g_mirror.c revision 132907
1/*-
2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/geom/mirror/g_mirror.c 132907 2004-07-31 00:37:14Z pjd $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sysctl.h>
39#include <sys/malloc.h>
40#include <sys/bitstring.h>
41#include <vm/uma.h>
42#include <machine/atomic.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <geom/mirror/g_mirror.h>
47
48
49static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50
51SYSCTL_DECL(_kern_geom);
52SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53u_int g_mirror_debug = 0;
54SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55    "Debug level");
56static u_int g_mirror_sync_block_size = 131072;
57SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_block_size, CTLFLAG_RW,
58    &g_mirror_sync_block_size, 0, "Synchronization block size");
59static u_int g_mirror_timeout = 8;
60SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
61    0, "Time to wait on all mirror components");
62static u_int g_mirror_reqs_per_sync = 5;
63SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
64    &g_mirror_reqs_per_sync, 0,
65    "Number of regular I/O requests per synchronization request");
66static u_int g_mirror_syncs_per_sec = 100;
67SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
68    &g_mirror_syncs_per_sec, 0,
69    "Number of synchronizations requests per second");
70
71#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
72	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
73	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
74	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
75} while (0)
76
77
78static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
79    struct g_geom *gp);
80static g_taste_t g_mirror_taste;
81
82struct g_class g_mirror_class = {
83	.name = G_MIRROR_CLASS_NAME,
84	.ctlreq = g_mirror_config,
85	.taste = g_mirror_taste,
86	.destroy_geom = g_mirror_destroy_geom
87};
88
89
90static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
91static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
92static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
93static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
94    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
95static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
96
97
98static const char *
99g_mirror_disk_state2str(int state)
100{
101
102	switch (state) {
103	case G_MIRROR_DISK_STATE_NONE:
104		return ("NONE");
105	case G_MIRROR_DISK_STATE_NEW:
106		return ("NEW");
107	case G_MIRROR_DISK_STATE_ACTIVE:
108		return ("ACTIVE");
109	case G_MIRROR_DISK_STATE_STALE:
110		return ("STALE");
111	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
112		return ("SYNCHRONIZING");
113	case G_MIRROR_DISK_STATE_DISCONNECTED:
114		return ("DISCONNECTED");
115	case G_MIRROR_DISK_STATE_DESTROY:
116		return ("DESTROY");
117	default:
118		return ("INVALID");
119	}
120}
121
122static const char *
123g_mirror_device_state2str(int state)
124{
125
126	switch (state) {
127	case G_MIRROR_DEVICE_STATE_STARTING:
128		return ("STARTING");
129	case G_MIRROR_DEVICE_STATE_RUNNING:
130		return ("RUNNING");
131	default:
132		return ("INVALID");
133	}
134}
135
136static const char *
137g_mirror_get_diskname(struct g_mirror_disk *disk)
138{
139
140	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
141		return ("[unknown]");
142	return (disk->d_name);
143}
144
145/*
146 * --- Events handling functions ---
147 * Events in geom_mirror are used to maintain disks and device status
148 * from one thread to simplify locking.
149 */
150static void
151g_mirror_event_free(struct g_mirror_event *ep)
152{
153
154	free(ep, M_MIRROR);
155}
156
157int
158g_mirror_event_send(void *arg, int state, int flags)
159{
160	struct g_mirror_softc *sc;
161	struct g_mirror_disk *disk;
162	struct g_mirror_event *ep;
163	int error;
164
165	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
166	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
167	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
168		disk = NULL;
169		sc = arg;
170	} else {
171		disk = arg;
172		sc = disk->d_softc;
173	}
174	ep->e_disk = disk;
175	ep->e_state = state;
176	ep->e_flags = flags;
177	ep->e_error = 0;
178	mtx_lock(&sc->sc_events_mtx);
179	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
180	mtx_unlock(&sc->sc_events_mtx);
181	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
182	mtx_lock(&sc->sc_queue_mtx);
183	wakeup(sc);
184	mtx_unlock(&sc->sc_queue_mtx);
185	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
186		return (0);
187	g_topology_assert();
188	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
189	g_topology_unlock();
190	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
191		mtx_lock(&sc->sc_events_mtx);
192		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
193		    hz * 5);
194	}
195	/* Don't even try to use 'sc' here, because it could be already dead. */
196	g_topology_lock();
197	error = ep->e_error;
198	g_mirror_event_free(ep);
199	return (error);
200}
201
202static struct g_mirror_event *
203g_mirror_event_get(struct g_mirror_softc *sc)
204{
205	struct g_mirror_event *ep;
206
207	mtx_lock(&sc->sc_events_mtx);
208	ep = TAILQ_FIRST(&sc->sc_events);
209	if (ep != NULL)
210		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
211	mtx_unlock(&sc->sc_events_mtx);
212	return (ep);
213}
214
215static void
216g_mirror_event_cancel(struct g_mirror_disk *disk)
217{
218	struct g_mirror_softc *sc;
219	struct g_mirror_event *ep, *tmpep;
220
221	g_topology_assert();
222
223	sc = disk->d_softc;
224	mtx_lock(&sc->sc_events_mtx);
225	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
226		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
227			continue;
228		if (ep->e_disk != disk)
229			continue;
230		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
232			g_mirror_event_free(ep);
233		else {
234			ep->e_error = ECANCELED;
235			wakeup(ep);
236		}
237	}
238	mtx_unlock(&sc->sc_events_mtx);
239}
240
241/*
242 * Return the number of disks in given state.
243 * If state is equal to -1, count all connected disks.
244 */
245u_int
246g_mirror_ndisks(struct g_mirror_softc *sc, int state)
247{
248	struct g_mirror_disk *disk;
249	u_int n = 0;
250
251	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
252		if (state == -1 || disk->d_state == state)
253			n++;
254	}
255	return (n);
256}
257
258/*
259 * Find a disk in mirror by its disk ID.
260 */
261static struct g_mirror_disk *
262g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
263{
264	struct g_mirror_disk *disk;
265
266	g_topology_assert();
267
268	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
269		if (disk->d_id == id)
270			return (disk);
271	}
272	return (NULL);
273}
274
275static u_int
276g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
277{
278	struct bio *bp;
279	u_int nreqs = 0;
280
281	mtx_lock(&sc->sc_queue_mtx);
282	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
283		if (bp->bio_from == cp)
284			nreqs++;
285	}
286	mtx_unlock(&sc->sc_queue_mtx);
287	return (nreqs);
288}
289
290static void
291g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
292{
293
294	g_topology_assert();
295
296	cp->private = NULL;
297	if (cp->nstart != cp->nend) {
298		G_MIRROR_DEBUG(2,
299		    "I/O requests for %s exist, can't destroy it now.",
300		    cp->provider->name);
301		return;
302	}
303	if (g_mirror_nrequests(sc, cp) > 0) {
304		G_MIRROR_DEBUG(2,
305		    "I/O requests for %s in queue, can't destroy it now.",
306		    cp->provider->name);
307		return;
308	}
309	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
310	g_detach(cp);
311	g_destroy_consumer(cp);
312}
313
314static int
315g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
316{
317	int error;
318
319	g_topology_assert();
320	KASSERT(disk->d_consumer == NULL,
321	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
322
323	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
324	disk->d_consumer->private = disk;
325	error = g_attach(disk->d_consumer, pp);
326	if (error != 0)
327		return (error);
328	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
329	return (0);
330}
331
332static void
333g_mirror_disconnect_disk(struct g_mirror_disk *disk)
334{
335	struct g_consumer *cp;
336
337	g_topology_assert();
338
339	cp = disk->d_consumer;
340	if (cp == NULL)
341		return;
342	if (cp->provider != NULL) {
343		G_MIRROR_DEBUG(2, "Disk %s disconnected.",
344		    g_mirror_get_diskname(disk));
345		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
346			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
347			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
348			    0);
349			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
350		}
351		g_mirror_kill_consumer(disk->d_softc, cp);
352	} else {
353		g_destroy_consumer(cp);
354	}
355}
356
357/*
358 * Initialize disk. This means allocate memory, create consumer, attach it
359 * to the provider and open access (r1w1e1) to it.
360 */
361static struct g_mirror_disk *
362g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
363    struct g_mirror_metadata *md, int *errorp)
364{
365	struct g_mirror_disk *disk;
366	int error;
367
368	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
369	if (disk == NULL) {
370		error = ENOMEM;
371		goto fail;
372	}
373	disk->d_softc = sc;
374	error = g_mirror_connect_disk(disk, pp);
375	if (error != 0)
376		goto fail;
377	disk->d_id = md->md_did;
378	disk->d_state = G_MIRROR_DISK_STATE_NONE;
379	disk->d_priority = md->md_priority;
380	disk->d_delay.sec = 0;
381	disk->d_delay.frac = 0;
382	binuptime(&disk->d_last_used);
383	disk->d_flags = md->md_dflags;
384	disk->d_sync.ds_consumer = NULL;
385	disk->d_sync.ds_offset = md->md_sync_offset;
386	disk->d_sync.ds_offset_done = md->md_sync_offset;
387	disk->d_sync.ds_syncid = md->md_syncid;
388	if (errorp != NULL)
389		*errorp = 0;
390	return (disk);
391fail:
392	if (errorp != NULL)
393		*errorp = error;
394	if (disk != NULL) {
395		g_mirror_disconnect_disk(disk);
396		free(disk, M_MIRROR);
397	}
398	return (NULL);
399}
400
401/*
402 * Free the disk.
403 */
404static void
405g_mirror_free_disk(struct g_mirror_disk *disk)
406{
407
408	g_topology_assert();
409
410	g_mirror_disconnect_disk(disk);
411	free(disk, M_MIRROR);
412}
413
414static void
415g_mirror_destroy_disk(struct g_mirror_disk *disk)
416{
417	struct g_mirror_softc *sc;
418
419	g_topology_assert();
420
421	LIST_REMOVE(disk, d_next);
422	g_mirror_event_cancel(disk);
423	sc = disk->d_softc;
424	if (sc->sc_hint == disk)
425		sc->sc_hint = NULL;
426	switch (disk->d_state) {
427	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
428		g_mirror_sync_stop(disk, 1);
429		/* FALLTHROUGH */
430	case G_MIRROR_DISK_STATE_NEW:
431	case G_MIRROR_DISK_STATE_STALE:
432	case G_MIRROR_DISK_STATE_ACTIVE:
433		g_mirror_free_disk(disk);
434		break;
435	default:
436		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
437		    g_mirror_get_diskname(disk),
438		    g_mirror_disk_state2str(disk->d_state)));
439	}
440}
441
442static void
443g_mirror_destroy_device(struct g_mirror_softc *sc)
444{
445	struct g_mirror_disk *disk;
446	struct g_mirror_event *ep;
447	struct g_geom *gp;
448
449	g_topology_assert();
450
451	gp = sc->sc_geom;
452	if (sc->sc_provider != NULL)
453		g_mirror_destroy_provider(sc);
454	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
455	    disk = LIST_FIRST(&sc->sc_disks)) {
456		g_mirror_destroy_disk(disk);
457	}
458	while ((ep = g_mirror_event_get(sc)) != NULL) {
459		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
460			g_mirror_event_free(ep);
461		else {
462			ep->e_error = ECANCELED;
463			ep->e_flags |= G_MIRROR_EVENT_DONE;
464			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
465			mtx_lock(&sc->sc_events_mtx);
466			wakeup(ep);
467			mtx_unlock(&sc->sc_events_mtx);
468		}
469	}
470	callout_drain(&sc->sc_callout);
471	gp->softc = NULL;
472	uma_zdestroy(sc->sc_sync.ds_zone);
473	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
474	mtx_destroy(&sc->sc_queue_mtx);
475	mtx_destroy(&sc->sc_events_mtx);
476	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
477	g_wither_geom(gp, ENXIO);
478}
479
480static void
481g_mirror_orphan(struct g_consumer *cp)
482{
483	struct g_mirror_disk *disk;
484
485	g_topology_assert();
486
487	disk = cp->private;
488	if (disk == NULL)
489		return;
490	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
491	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
492	    G_MIRROR_EVENT_DONTWAIT);
493}
494
495/*
496 * Function should return the next active disk on the list.
497 * It is possible that it will be the same disk as given.
498 * If there are no active disks on list, NULL is returned.
499 */
500static __inline struct g_mirror_disk *
501g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
502{
503	struct g_mirror_disk *dp;
504
505	for (dp = LIST_NEXT(disk, d_next); dp != disk;
506	    dp = LIST_NEXT(dp, d_next)) {
507		if (dp == NULL)
508			dp = LIST_FIRST(&sc->sc_disks);
509		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
510			break;
511	}
512	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
513		return (NULL);
514	return (dp);
515}
516
517static struct g_mirror_disk *
518g_mirror_get_disk(struct g_mirror_softc *sc)
519{
520	struct g_mirror_disk *disk;
521
522	if (sc->sc_hint == NULL) {
523		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
524		if (sc->sc_hint == NULL)
525			return (NULL);
526	}
527	disk = sc->sc_hint;
528	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
529		disk = g_mirror_find_next(sc, disk);
530		if (disk == NULL)
531			return (NULL);
532	}
533	sc->sc_hint = g_mirror_find_next(sc, disk);
534	return (disk);
535}
536
537static int
538g_mirror_clear_metadata(struct g_mirror_disk *disk)
539{
540	struct g_mirror_softc *sc;
541	struct g_consumer *cp;
542	off_t offset, length;
543	u_char *sector;
544	int close = 0, error = 0;
545
546	g_topology_assert();
547
548	sc = disk->d_softc;
549	cp = disk->d_consumer;
550	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
551	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
552	length = cp->provider->sectorsize;
553	offset = cp->provider->mediasize - length;
554	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
555	/*
556	 * Open consumer if it wasn't opened and remember to close it.
557	 */
558	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
559		error = g_access(cp, 0, 1, 1);
560		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
561		    cp->provider->name, 0, 1, 1, error);
562		if (error == 0)
563			close = 1;
564#ifdef	INVARIANTS
565	} else {
566		KASSERT(cp->acw > 0 && cp->ace > 0,
567		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
568		    cp->acr, cp->acw, cp->ace));
569#endif
570	}
571	if (error == 0) {
572		g_topology_unlock();
573		error = g_write_data(cp, offset, sector, length);
574		g_topology_lock();
575	}
576	free(sector, M_MIRROR);
577	if (close) {
578		g_access(cp, 0, -1, -1);
579		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
580		    cp->provider->name, 0, -1, -1, 0);
581	}
582	if (error != 0) {
583		G_MIRROR_DEBUG(0, "Cannot clear metadata on disk %s.",
584		    g_mirror_get_diskname(disk));
585		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
586		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
587		    G_MIRROR_EVENT_DONTWAIT);
588		return (error);
589	}
590	G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
591	    g_mirror_get_diskname(disk));
592	return (0);
593}
594
595void
596g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
597    struct g_mirror_metadata *md)
598{
599
600	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
601	md->md_version = G_MIRROR_VERSION;
602	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
603	md->md_mid = sc->sc_id;
604	md->md_all = sc->sc_ndisks;
605	md->md_slice = sc->sc_slice;
606	md->md_balance = sc->sc_balance;
607	md->md_mediasize = sc->sc_mediasize;
608	md->md_sectorsize = sc->sc_sectorsize;
609	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
610	if (disk == NULL) {
611		md->md_did = arc4random();
612		md->md_priority = 0;
613		md->md_syncid = 0;
614		md->md_dflags = 0;
615		md->md_sync_offset = 0;
616	} else {
617		md->md_did = disk->d_id;
618		md->md_priority = disk->d_priority;
619		md->md_syncid = disk->d_sync.ds_syncid;
620		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
621		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
622			md->md_sync_offset = disk->d_sync.ds_offset_done;
623		else
624			md->md_sync_offset = 0;
625	}
626}
627
628void
629g_mirror_update_metadata(struct g_mirror_disk *disk)
630{
631	struct g_mirror_softc *sc;
632	struct g_mirror_metadata md;
633	struct g_consumer *cp;
634	off_t offset, length;
635	u_char *sector;
636	int close = 0, error = 0;
637
638	g_topology_assert();
639
640	sc = disk->d_softc;
641	cp = disk->d_consumer;
642	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
643	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
644	length = cp->provider->sectorsize;
645	offset = cp->provider->mediasize - length;
646	sector = malloc((size_t)length, M_MIRROR, M_WAITOK);
647	/*
648	 * Open consumer if it wasn't opened and remember to close it.
649	 */
650	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
651		error = g_access(cp, 0, 1, 1);
652		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
653		    cp->provider->name, 0, 1, 1, error);
654		if (error == 0)
655			close = 1;
656#ifdef	INVARIANTS
657	} else {
658		KASSERT(cp->acw > 0 && cp->ace > 0,
659		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
660		    cp->acr, cp->acw, cp->ace));
661#endif
662	}
663	if (error == 0) {
664		g_mirror_fill_metadata(sc, disk, &md);
665		mirror_metadata_encode(&md, sector);
666		g_topology_unlock();
667		error = g_write_data(cp, offset, sector, length);
668		g_topology_lock();
669	}
670	free(sector, M_MIRROR);
671	if (close) {
672		g_access(cp, 0, -1, -1);
673		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
674		    cp->provider->name, 0, -1, -1, 0);
675	}
676	if (error != 0) {
677		G_MIRROR_DEBUG(0,
678		    "Cannot update metadata on disk %s (error=%d).",
679		    g_mirror_get_diskname(disk), error);
680		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
681		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
682		    G_MIRROR_EVENT_DONTWAIT);
683		return;
684	}
685	G_MIRROR_DEBUG(2, "Metadata on %s updated.",
686	    g_mirror_get_diskname(disk));
687}
688
689static void
690g_mirror_bump_syncid(struct g_mirror_softc *sc)
691{
692	struct g_mirror_disk *disk;
693
694	g_topology_assert();
695	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
696	    ("%s called with no active disks (device=%s).", __func__,
697	    sc->sc_name));
698
699	sc->sc_syncid++;
700	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
701		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
702		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
703			disk->d_sync.ds_syncid = sc->sc_syncid;
704			g_mirror_update_metadata(disk);
705		}
706	}
707}
708
709static __inline int
710bintime_cmp(struct bintime *bt1, struct bintime *bt2)
711{
712
713	if (bt1->sec < bt2->sec)
714		return (-1);
715	else if (bt1->sec > bt2->sec)
716		return (1);
717	if (bt1->frac < bt2->frac)
718		return (-1);
719	else if (bt1->frac > bt2->frac)
720		return (1);
721	return (0);
722}
723
724static void
725g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
726{
727
728	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
729		return;
730	binuptime(&disk->d_delay);
731	bintime_sub(&disk->d_delay, &bp->bio_t0);
732}
733
734static void
735g_mirror_done(struct bio *bp)
736{
737	struct g_mirror_softc *sc;
738
739	sc = bp->bio_from->geom->softc;
740	bp->bio_flags = BIO_FLAG1;
741	mtx_lock(&sc->sc_queue_mtx);
742	bioq_disksort(&sc->sc_queue, bp);
743	wakeup(sc);
744	mtx_unlock(&sc->sc_queue_mtx);
745}
746
747static void
748g_mirror_regular_request(struct bio *bp)
749{
750	struct g_mirror_softc *sc;
751	struct g_mirror_disk *disk;
752	struct bio *pbp;
753
754	g_topology_assert_not();
755
756	pbp = bp->bio_parent;
757	sc = pbp->bio_to->geom->softc;
758	disk = bp->bio_from->private;
759	if (disk == NULL) {
760		g_topology_lock();
761		g_mirror_kill_consumer(sc, bp->bio_from);
762		g_topology_unlock();
763	} else {
764		g_mirror_update_delay(disk, bp);
765	}
766
767	pbp->bio_inbed++;
768	KASSERT(pbp->bio_inbed <= pbp->bio_children,
769	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
770	    pbp->bio_children));
771	if (bp->bio_error == 0 && pbp->bio_error == 0) {
772		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
773		g_destroy_bio(bp);
774		if (pbp->bio_children == pbp->bio_inbed) {
775			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
776			pbp->bio_completed = pbp->bio_length;
777			g_io_deliver(pbp, pbp->bio_error);
778		}
779		return;
780	} else if (bp->bio_error != 0) {
781		if (pbp->bio_error == 0)
782			pbp->bio_error = bp->bio_error;
783		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
784		    bp->bio_error);
785		if (disk != NULL) {
786			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
787			g_mirror_event_send(disk,
788			    G_MIRROR_DISK_STATE_DISCONNECTED,
789			    G_MIRROR_EVENT_DONTWAIT);
790		}
791		switch (pbp->bio_cmd) {
792		case BIO_DELETE:
793		case BIO_WRITE:
794			pbp->bio_inbed--;
795			pbp->bio_children--;
796			break;
797		}
798	}
799	g_destroy_bio(bp);
800
801	switch (pbp->bio_cmd) {
802	case BIO_READ:
803		if (pbp->bio_children == pbp->bio_inbed) {
804			pbp->bio_error = 0;
805			mtx_lock(&sc->sc_queue_mtx);
806			bioq_disksort(&sc->sc_queue, pbp);
807			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
808			wakeup(sc);
809			mtx_unlock(&sc->sc_queue_mtx);
810		}
811		break;
812	case BIO_DELETE:
813	case BIO_WRITE:
814		if (pbp->bio_children == 0) {
815			/*
816			 * All requests failed.
817			 */
818		} else if (pbp->bio_inbed < pbp->bio_children) {
819			/* Do nothing. */
820			break;
821		} else if (pbp->bio_children == pbp->bio_inbed) {
822			/* Some requests succeeded. */
823			pbp->bio_error = 0;
824			pbp->bio_completed = pbp->bio_length;
825		}
826		g_io_deliver(pbp, pbp->bio_error);
827		break;
828	default:
829		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
830		break;
831	}
832}
833
834static void
835g_mirror_sync_done(struct bio *bp)
836{
837	struct g_mirror_softc *sc;
838
839	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
840	sc = bp->bio_from->geom->softc;
841	bp->bio_flags = BIO_FLAG2;
842	mtx_lock(&sc->sc_queue_mtx);
843	bioq_disksort(&sc->sc_queue, bp);
844	wakeup(sc);
845	mtx_unlock(&sc->sc_queue_mtx);
846}
847
848static void
849g_mirror_start(struct bio *bp)
850{
851	struct g_mirror_softc *sc;
852
853	sc = bp->bio_to->geom->softc;
854	/*
855	 * If sc == NULL or there are no valid disks, provider's error
856	 * should be set and g_mirror_start() should not be called at all.
857	 */
858	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
859	    ("Provider's error should be set (error=%d)(mirror=%s).",
860	    bp->bio_to->error, bp->bio_to->name));
861	G_MIRROR_LOGREQ(3, bp, "Request received.");
862
863	switch (bp->bio_cmd) {
864	case BIO_READ:
865	case BIO_WRITE:
866	case BIO_DELETE:
867		break;
868	case BIO_GETATTR:
869	default:
870		g_io_deliver(bp, EOPNOTSUPP);
871		return;
872	}
873	mtx_lock(&sc->sc_queue_mtx);
874	bioq_disksort(&sc->sc_queue, bp);
875	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
876	wakeup(sc);
877	mtx_unlock(&sc->sc_queue_mtx);
878}
879
880/*
881 * Send one synchronization request.
882 */
883static void
884g_mirror_sync_one(struct g_mirror_disk *disk)
885{
886	struct g_mirror_softc *sc;
887	struct bio *bp;
888
889	sc = disk->d_softc;
890	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
891	    ("Disk %s is not marked for synchronization.",
892	    g_mirror_get_diskname(disk)));
893
894	bp = g_new_bio();
895	if (bp == NULL)
896		return;
897	bp->bio_parent = NULL;
898	bp->bio_cmd = BIO_READ;
899	bp->bio_offset = disk->d_sync.ds_offset;
900	bp->bio_length = MIN(sc->sc_sync.ds_block,
901	    sc->sc_mediasize - bp->bio_offset);
902	bp->bio_flags = 0;
903	bp->bio_done = g_mirror_sync_done;
904	bp->bio_data = uma_zalloc(sc->sc_sync.ds_zone, M_NOWAIT | M_ZERO);
905	if (bp->bio_data == NULL) {
906		g_destroy_bio(bp);
907		return;
908	}
909	disk->d_sync.ds_offset += bp->bio_length;
910	bp->bio_to = sc->sc_provider;
911	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
912	g_io_request(bp, disk->d_sync.ds_consumer);
913}
914
915static void
916g_mirror_sync_request(struct bio *bp)
917{
918	struct g_mirror_softc *sc;
919	struct g_mirror_disk *disk;
920
921	sc = bp->bio_from->geom->softc;
922	disk = bp->bio_from->private;
923	if (disk == NULL) {
924		g_topology_lock();
925		g_mirror_kill_consumer(sc, bp->bio_from);
926		g_topology_unlock();
927		uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
928		g_destroy_bio(bp);
929		return;
930	}
931
932	/*
933	 * Synchronization request.
934	 */
935	switch (bp->bio_cmd) {
936	case BIO_READ:
937	    {
938		struct g_consumer *cp;
939
940		if (bp->bio_error != 0) {
941			G_MIRROR_LOGREQ(0, bp,
942			    "Synchronization request failed (error=%d).",
943			    bp->bio_error);
944			uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
945			g_destroy_bio(bp);
946			return;
947		}
948		bp->bio_cmd = BIO_WRITE;
949		bp->bio_flags = 0;
950		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
951		cp = disk->d_consumer;
952		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
953		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
954		    cp->acr, cp->acw, cp->ace));
955		g_io_request(bp, cp);
956		return;
957	    }
958	case BIO_WRITE:
959		uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
960		if (bp->bio_error != 0) {
961			G_MIRROR_LOGREQ(0, bp,
962			    "Synchronization request failed (error=%d).",
963			    bp->bio_error);
964			g_destroy_bio(bp);
965			disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
966			g_mirror_event_send(disk,
967			    G_MIRROR_DISK_STATE_DISCONNECTED,
968			    G_MIRROR_EVENT_DONTWAIT);
969			return;
970		}
971		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
972		g_destroy_bio(bp);
973		disk->d_sync.ds_offset_done = bp->bio_offset + bp->bio_length;
974		if (bp->bio_offset + bp->bio_length ==
975		    sc->sc_provider->mediasize) {
976			/*
977			 * Disk up-to-date, activate it.
978			 */
979			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
980			    G_MIRROR_EVENT_DONTWAIT);
981			return;
982		} else if ((disk->d_sync.ds_offset_done %
983		    (sc->sc_sync.ds_block * 100)) == 0) {
984			/*
985			 * Update offset_done on every 100 blocks.
986			 * XXX: This should be configurable.
987			 */
988			g_topology_lock();
989			g_mirror_update_metadata(disk);
990			g_topology_unlock();
991		}
992		return;
993	default:
994		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
995		    bp->bio_cmd, sc->sc_name));
996		break;
997	}
998}
999
1000static void
1001g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1002{
1003	struct g_mirror_disk *disk;
1004	struct g_consumer *cp;
1005	struct bio *cbp;
1006
1007	disk = g_mirror_get_disk(sc);
1008	if (disk == NULL) {
1009		if (bp->bio_error == 0)
1010			bp->bio_error = ENXIO;
1011		g_io_deliver(bp, bp->bio_error);
1012		return;
1013	}
1014	cbp = g_clone_bio(bp);
1015	if (cbp == NULL) {
1016		if (bp->bio_error == 0)
1017			bp->bio_error = ENOMEM;
1018		g_io_deliver(bp, bp->bio_error);
1019		return;
1020	}
1021	/*
1022	 * Fill in the component buf structure.
1023	 */
1024	cp = disk->d_consumer;
1025	cbp->bio_done = g_mirror_done;
1026	cbp->bio_to = cp->provider;
1027	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1028	KASSERT(cp->acr > 0 && cp->ace > 0,
1029	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1030	    cp->acw, cp->ace));
1031	g_io_request(cbp, cp);
1032}
1033
1034static void
1035g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1036{
1037	struct g_mirror_disk *disk, *dp;
1038	struct g_consumer *cp;
1039	struct bio *cbp;
1040	struct bintime curtime;
1041
1042	binuptime(&curtime);
1043	/*
1044	 * Find a disk which the smallest load.
1045	 */
1046	disk = NULL;
1047	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1048		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1049			continue;
1050		/* If disk wasn't used for more than 2 sec, use it. */
1051		if (curtime.sec - dp->d_last_used.sec >= 2) {
1052			disk = dp;
1053			break;
1054		}
1055		if (disk == NULL ||
1056		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1057			disk = dp;
1058		}
1059	}
1060	cbp = g_clone_bio(bp);
1061	if (cbp == NULL) {
1062		if (bp->bio_error == 0)
1063			bp->bio_error = ENOMEM;
1064		g_io_deliver(bp, bp->bio_error);
1065		return;
1066	}
1067	/*
1068	 * Fill in the component buf structure.
1069	 */
1070	cp = disk->d_consumer;
1071	cbp->bio_done = g_mirror_done;
1072	cbp->bio_to = cp->provider;
1073	binuptime(&disk->d_last_used);
1074	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1075	KASSERT(cp->acr > 0 && cp->ace > 0,
1076	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1077	    cp->acw, cp->ace));
1078	g_io_request(cbp, cp);
1079}
1080
1081static void
1082g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1083{
1084	struct bio_queue_head queue;
1085	struct g_mirror_disk *disk;
1086	struct g_consumer *cp;
1087	struct bio *cbp;
1088	off_t left, mod, offset, slice;
1089	u_char *data;
1090	u_int ndisks;
1091
1092	if (bp->bio_length <= sc->sc_slice) {
1093		g_mirror_request_round_robin(sc, bp);
1094		return;
1095	}
1096	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1097	slice = bp->bio_length / ndisks;
1098	mod = slice % sc->sc_provider->sectorsize;
1099	if (mod != 0)
1100		slice += sc->sc_provider->sectorsize - mod;
1101	/*
1102	 * Allocate all bios before sending any request, so we can
1103	 * return ENOMEM in nice and clean way.
1104	 */
1105	left = bp->bio_length;
1106	offset = bp->bio_offset;
1107	data = bp->bio_data;
1108	bioq_init(&queue);
1109	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1110		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1111			continue;
1112		cbp = g_clone_bio(bp);
1113		if (cbp == NULL) {
1114			for (cbp = bioq_first(&queue); cbp != NULL;
1115			    cbp = bioq_first(&queue)) {
1116				bioq_remove(&queue, cbp);
1117				g_destroy_bio(cbp);
1118			}
1119			if (bp->bio_error == 0)
1120				bp->bio_error = ENOMEM;
1121			g_io_deliver(bp, bp->bio_error);
1122			return;
1123		}
1124		bioq_insert_tail(&queue, cbp);
1125		cbp->bio_done = g_mirror_done;
1126		cbp->bio_caller1 = disk;
1127		cbp->bio_to = disk->d_consumer->provider;
1128		cbp->bio_offset = offset;
1129		cbp->bio_data = data;
1130		cbp->bio_length = MIN(left, slice);
1131		left -= cbp->bio_length;
1132		if (left == 0)
1133			break;
1134		offset += cbp->bio_length;
1135		data += cbp->bio_length;
1136	}
1137	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1138		bioq_remove(&queue, cbp);
1139		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1140		disk = cbp->bio_caller1;
1141		cbp->bio_caller1 = NULL;
1142		cp = disk->d_consumer;
1143		KASSERT(cp->acr > 0 && cp->ace > 0,
1144		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1145		    cp->acr, cp->acw, cp->ace));
1146		g_io_request(cbp, disk->d_consumer);
1147	}
1148}
1149
1150static void
1151g_mirror_register_request(struct bio *bp)
1152{
1153	struct g_mirror_softc *sc;
1154
1155	sc = bp->bio_to->geom->softc;
1156	switch (bp->bio_cmd) {
1157	case BIO_READ:
1158		switch (sc->sc_balance) {
1159		case G_MIRROR_BALANCE_ROUND_ROBIN:
1160			g_mirror_request_round_robin(sc, bp);
1161			break;
1162		case G_MIRROR_BALANCE_LOAD:
1163			g_mirror_request_load(sc, bp);
1164			break;
1165		case G_MIRROR_BALANCE_SPLIT:
1166			g_mirror_request_split(sc, bp);
1167			break;
1168		}
1169		return;
1170	case BIO_WRITE:
1171	case BIO_DELETE:
1172	    {
1173		struct g_mirror_disk *disk;
1174		struct bio_queue_head queue;
1175		struct g_consumer *cp;
1176		struct bio *cbp;
1177
1178		/*
1179		 * Allocate all bios before sending any request, so we can
1180		 * return ENOMEM in nice and clean way.
1181		 */
1182		bioq_init(&queue);
1183		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1184			switch (disk->d_state) {
1185			case G_MIRROR_DISK_STATE_ACTIVE:
1186				break;
1187			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1188				if (bp->bio_offset >= disk->d_sync.ds_offset)
1189					continue;
1190				break;
1191			default:
1192				continue;
1193			}
1194			cbp = g_clone_bio(bp);
1195			if (cbp == NULL) {
1196				for (cbp = bioq_first(&queue); cbp != NULL;
1197				    cbp = bioq_first(&queue)) {
1198					bioq_remove(&queue, cbp);
1199					g_destroy_bio(cbp);
1200				}
1201				if (bp->bio_error == 0)
1202					bp->bio_error = ENOMEM;
1203				g_io_deliver(bp, bp->bio_error);
1204				return;
1205			}
1206			bioq_insert_tail(&queue, cbp);
1207		}
1208		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1209			switch (disk->d_state) {
1210			case G_MIRROR_DISK_STATE_ACTIVE:
1211				break;
1212			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1213				if (bp->bio_offset >= disk->d_sync.ds_offset)
1214					continue;
1215				break;
1216			default:
1217				continue;
1218			}
1219			cbp = bioq_first(&queue);
1220			KASSERT(cbp != NULL, ("NULL cbp! (device %s).",
1221			    sc->sc_name));
1222			bioq_remove(&queue, cbp);
1223			cp = disk->d_consumer;
1224			cbp->bio_done = g_mirror_done;
1225			cbp->bio_to = cp->provider;
1226			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1227			KASSERT(cp->acw > 0 && cp->ace > 0,
1228			    ("Consumer %s not opened (r%dw%de%d).",
1229			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1230			g_io_request(cbp, cp);
1231		}
1232		/*
1233		 * Bump syncid on first write.
1234		 */
1235		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1236			sc->sc_bump_syncid = 0;
1237			g_topology_lock();
1238			g_mirror_bump_syncid(sc);
1239			g_topology_unlock();
1240		}
1241		return;
1242	    }
1243	default:
1244		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1245		    bp->bio_cmd, sc->sc_name));
1246		break;
1247	}
1248}
1249
1250/*
1251 * Worker thread.
1252 */
1253static void
1254g_mirror_worker(void *arg)
1255{
1256	struct g_mirror_softc *sc;
1257	struct g_mirror_disk *disk;
1258	struct g_mirror_event *ep;
1259	struct bio *bp;
1260	u_int nreqs;
1261
1262	sc = arg;
1263	curthread->td_base_pri = PRIBIO;
1264
1265	nreqs = 0;
1266	for (;;) {
1267		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1268		/*
1269		 * First take a look at events.
1270		 * This is important to handle events before any I/O requests.
1271		 */
1272		ep = g_mirror_event_get(sc);
1273		if (ep != NULL) {
1274			g_topology_lock();
1275			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1276				/* Update only device status. */
1277				G_MIRROR_DEBUG(3,
1278				    "Running event for device %s.",
1279				    sc->sc_name);
1280				ep->e_error = 0;
1281				g_mirror_update_device(sc, 1);
1282			} else {
1283				/* Update disk status. */
1284				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1285				     g_mirror_get_diskname(ep->e_disk));
1286				ep->e_error = g_mirror_update_disk(ep->e_disk,
1287				    ep->e_state);
1288				if (ep->e_error == 0)
1289					g_mirror_update_device(sc, 0);
1290			}
1291			g_topology_unlock();
1292			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1293				KASSERT(ep->e_error == 0,
1294				    ("Error cannot be handled."));
1295				g_mirror_event_free(ep);
1296			} else {
1297				ep->e_flags |= G_MIRROR_EVENT_DONE;
1298				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1299				    ep);
1300				mtx_lock(&sc->sc_events_mtx);
1301				wakeup(ep);
1302				mtx_unlock(&sc->sc_events_mtx);
1303			}
1304			if ((sc->sc_flags &
1305			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1306end:
1307				if ((sc->sc_flags &
1308				    G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1309					G_MIRROR_DEBUG(4, "%s: Waking up %p.",
1310					    __func__, &sc->sc_worker);
1311					wakeup(&sc->sc_worker);
1312					sc->sc_worker = NULL;
1313				} else {
1314					g_topology_lock();
1315					g_mirror_destroy_device(sc);
1316					g_topology_unlock();
1317					free(sc, M_MIRROR);
1318				}
1319				kthread_exit(0);
1320			}
1321			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1322			continue;
1323		}
1324		/*
1325		 * Now I/O requests.
1326		 */
1327		/* Get first request from the queue. */
1328		mtx_lock(&sc->sc_queue_mtx);
1329		bp = bioq_first(&sc->sc_queue);
1330		if (bp == NULL) {
1331			if ((sc->sc_flags &
1332			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1333				mtx_unlock(&sc->sc_queue_mtx);
1334				goto end;
1335			}
1336		}
1337		if (sc->sc_sync.ds_ndisks > 0 &&
1338		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1339			mtx_unlock(&sc->sc_queue_mtx);
1340			/*
1341			 * It is time for synchronization...
1342			 */
1343			nreqs = 0;
1344			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1345				if (disk->d_state !=
1346				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1347					continue;
1348				}
1349				if (disk->d_sync.ds_offset >=
1350				    sc->sc_provider->mediasize) {
1351					continue;
1352				}
1353				if (disk->d_sync.ds_offset >
1354				    disk->d_sync.ds_offset_done) {
1355					continue;
1356				}
1357				g_mirror_sync_one(disk);
1358			}
1359			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1360			goto sleep;
1361		}
1362		if (bp == NULL) {
1363			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 0);
1364			G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1365			continue;
1366		}
1367		nreqs++;
1368		bioq_remove(&sc->sc_queue, bp);
1369		mtx_unlock(&sc->sc_queue_mtx);
1370
1371		if ((bp->bio_flags & BIO_FLAG1) != 0) {
1372			g_mirror_regular_request(bp);
1373		} else if ((bp->bio_flags & BIO_FLAG2) != 0) {
1374			u_int timeout, sps;
1375
1376			g_mirror_sync_request(bp);
1377sleep:
1378			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1379			if (sps == 0) {
1380				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1381				continue;
1382			}
1383			mtx_lock(&sc->sc_queue_mtx);
1384			if (bioq_first(&sc->sc_queue) != NULL) {
1385				mtx_unlock(&sc->sc_queue_mtx);
1386				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1387				continue;
1388			}
1389			timeout = hz / sps;
1390			if (timeout == 0)
1391				timeout = 1;
1392			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1393			    timeout);
1394		} else {
1395			g_mirror_register_request(bp);
1396		}
1397		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1398	}
1399}
1400
1401/*
1402 * Open disk's consumer if needed.
1403 */
1404static void
1405g_mirror_update_access(struct g_mirror_disk *disk)
1406{
1407	struct g_provider *pp;
1408	struct g_consumer *cp;
1409	int acr, acw, ace, cpw, error;
1410
1411	g_topology_assert();
1412
1413	cp = disk->d_consumer;
1414	pp = disk->d_softc->sc_provider;
1415	if (pp == NULL) {
1416		acr = -cp->acr;
1417		acw = -cp->acw;
1418		ace = -cp->ace;
1419	} else {
1420		acr = pp->acr - cp->acr;
1421		acw = pp->acw - cp->acw;
1422		ace = pp->ace - cp->ace;
1423		/* Grab an extra "exclusive" bit. */
1424		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1425			ace++;
1426	}
1427	if (acr == 0 && acw == 0 && ace == 0)
1428		return;
1429	cpw = cp->acw;
1430	error = g_access(cp, acr, acw, ace);
1431	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1432	    acw, ace, error);
1433	if (error != 0) {
1434		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1435		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1436		    G_MIRROR_EVENT_DONTWAIT);
1437		return;
1438	}
1439	if (cpw == 0 && cp->acw > 0) {
1440		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1441		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1442		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1443	} else if (cpw > 0 && cp->acw == 0) {
1444		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1445		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1446		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1447	}
1448}
1449
1450static void
1451g_mirror_sync_start(struct g_mirror_disk *disk)
1452{
1453	struct g_mirror_softc *sc;
1454	struct g_consumer *cp;
1455	int error;
1456
1457	g_topology_assert();
1458
1459	sc = disk->d_softc;
1460	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1461	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1462	    sc->sc_state));
1463	cp = disk->d_consumer;
1464	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1465	    ("Consumer %s already opened.", cp->provider->name));
1466
1467	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1468	    g_mirror_get_diskname(disk));
1469	error = g_access(cp, 0, 1, 1);
1470	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1471	    1, error);
1472	if (error != 0) {
1473		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1474		    G_MIRROR_EVENT_DONTWAIT);
1475		return;
1476	}
1477	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1478	KASSERT(disk->d_sync.ds_consumer == NULL,
1479	    ("Sync consumer already exists (device=%s, disk=%s).",
1480	    sc->sc_name, g_mirror_get_diskname(disk)));
1481	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1482	disk->d_sync.ds_consumer->private = disk;
1483	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1484	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1485	    disk->d_softc->sc_name, error));
1486	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1487	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1488	    disk->d_softc->sc_name, error));
1489	sc->sc_sync.ds_ndisks++;
1490}
1491
1492/*
1493 * Stop synchronization process.
1494 * type: 0 - synchronization finished
1495 *       1 - synchronization stopped
1496 */
1497static void
1498g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1499{
1500	struct g_consumer *cp;
1501
1502	g_topology_assert();
1503	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1504	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1505	    g_mirror_disk_state2str(disk->d_state)));
1506	if (disk->d_sync.ds_consumer == NULL)
1507		return;
1508
1509	if (type == 0) {
1510		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1511		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1512	} else /* if (type == 1) */ {
1513		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1514		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1515	}
1516	cp = disk->d_sync.ds_consumer;
1517	g_access(cp, -1, 0, 0);
1518	g_mirror_kill_consumer(disk->d_softc, cp);
1519	disk->d_sync.ds_consumer = NULL;
1520	disk->d_softc->sc_sync.ds_ndisks--;
1521	cp = disk->d_consumer;
1522	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1523	    ("Consumer %s not opened.", cp->provider->name));
1524	g_access(cp, 0, -1, -1);
1525	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1526	    -1, 0);
1527	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1528}
1529
1530static void
1531g_mirror_launch_provider(struct g_mirror_softc *sc)
1532{
1533	struct g_mirror_disk *disk;
1534	struct g_provider *pp;
1535
1536	g_topology_assert();
1537
1538	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1539	pp->mediasize = sc->sc_mediasize;
1540	pp->sectorsize = sc->sc_sectorsize;
1541	sc->sc_provider = pp;
1542	g_error_provider(pp, 0);
1543	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1544	    pp->name);
1545	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1546		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1547			g_mirror_sync_start(disk);
1548	}
1549}
1550
1551static void
1552g_mirror_destroy_provider(struct g_mirror_softc *sc)
1553{
1554	struct g_mirror_disk *disk;
1555	struct bio *bp;
1556
1557	g_topology_assert();
1558	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1559	    sc->sc_name));
1560
1561	g_error_provider(sc->sc_provider, ENXIO);
1562	mtx_lock(&sc->sc_queue_mtx);
1563	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1564		bioq_remove(&sc->sc_queue, bp);
1565		g_io_deliver(bp, ENXIO);
1566	}
1567	mtx_unlock(&sc->sc_queue_mtx);
1568	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1569	    sc->sc_provider->name);
1570	sc->sc_provider->flags |= G_PF_WITHER;
1571	g_orphan_provider(sc->sc_provider, ENXIO);
1572	sc->sc_provider = NULL;
1573	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1574		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1575			g_mirror_sync_stop(disk, 1);
1576	}
1577}
1578
1579static void
1580g_mirror_go(void *arg)
1581{
1582	struct g_mirror_softc *sc;
1583
1584	sc = arg;
1585	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1586	g_mirror_event_send(sc, 0,
1587	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1588}
1589
1590static u_int
1591g_mirror_determine_state(struct g_mirror_disk *disk)
1592{
1593	struct g_mirror_softc *sc;
1594	u_int state;
1595
1596	sc = disk->d_softc;
1597	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1598		if ((disk->d_flags &
1599		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1600			/* Disk does not need synchronization. */
1601			state = G_MIRROR_DISK_STATE_ACTIVE;
1602		} else {
1603			if ((sc->sc_flags &
1604			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1605			    (disk->d_flags &
1606			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1607				/*
1608				 * We can start synchronization from
1609				 * the stored offset.
1610				 */
1611				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1612			} else {
1613				state = G_MIRROR_DISK_STATE_STALE;
1614			}
1615		}
1616	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1617		/*
1618		 * Reset all synchronization data for this disk,
1619		 * because if it even was synchronized, it was
1620		 * synchronized to disks with different syncid.
1621		 */
1622		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1623		disk->d_sync.ds_offset = 0;
1624		disk->d_sync.ds_offset_done = 0;
1625		disk->d_sync.ds_syncid = sc->sc_syncid;
1626		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1627		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1628			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1629		} else {
1630			state = G_MIRROR_DISK_STATE_STALE;
1631		}
1632	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1633		/*
1634		 * Not good, NOT GOOD!
1635		 * It means that mirror was started on stale disks
1636		 * and more fresh disk just arrive.
1637		 * If there were writes, mirror is fucked up, sorry.
1638		 * I think the best choice here is don't touch
1639		 * this disk and inform the user laudly.
1640		 */
1641		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1642		    "disk (%s) arrives!! It will not be connected to the "
1643		    "running device.", sc->sc_name,
1644		    g_mirror_get_diskname(disk));
1645		g_mirror_destroy_disk(disk);
1646		state = G_MIRROR_DISK_STATE_NONE;
1647		/* Return immediately, because disk was destroyed. */
1648		return (state);
1649	}
1650	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1651	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1652	return (state);
1653}
1654
1655/*
1656 * Update device state.
1657 */
1658static void
1659g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1660{
1661	struct g_mirror_disk *disk;
1662	u_int state;
1663
1664	g_topology_assert();
1665
1666	switch (sc->sc_state) {
1667	case G_MIRROR_DEVICE_STATE_STARTING:
1668	    {
1669		struct g_mirror_disk *pdisk;
1670		u_int dirty, ndisks, syncid;
1671
1672		KASSERT(sc->sc_provider == NULL,
1673		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1674		/*
1675		 * Are we ready? We are, if all disks are connected or
1676		 * if we have any disks and 'force' is true.
1677		 */
1678		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1679		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1680			;
1681		} else if (g_mirror_ndisks(sc, -1) == 0) {
1682			/*
1683			 * Disks went down in starting phase, so destroy
1684			 * device.
1685			 */
1686			callout_drain(&sc->sc_callout);
1687			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1688			return;
1689		} else {
1690			return;
1691		}
1692
1693		/*
1694		 * Activate all disks with the biggest syncid.
1695		 */
1696		if (force) {
1697			/*
1698			 * If called with 'force' true, we're called from
1699			 * timeout * procedure, so don't bother canceling
1700			 * timeout.
1701			 */
1702			ndisks = 0;
1703			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1704				if ((disk->d_flags &
1705				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1706					ndisks++;
1707				}
1708			}
1709			if (ndisks == 0) {
1710				int timeout;
1711
1712				/* No valid disks still, wait some more. */
1713				timeout =
1714				    atomic_load_acq_int(&g_mirror_timeout);
1715				callout_reset(&sc->sc_callout, timeout * hz,
1716				    g_mirror_go, sc);
1717				return;
1718			}
1719		} else {
1720			/* Cancel timeout. */
1721			callout_drain(&sc->sc_callout);
1722		}
1723
1724		/*
1725		 * Find disk with the biggest syncid.
1726		 */
1727		syncid = 0;
1728		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1729			if (disk->d_sync.ds_syncid > syncid)
1730				syncid = disk->d_sync.ds_syncid;
1731		}
1732
1733		/*
1734		 * Here we need to look for dirty disks and if all disks
1735		 * with the biggest syncid are dirty, we have to choose
1736		 * one with the biggest priority and rebuild the rest.
1737		 */
1738		/*
1739		 * Find the number of dirty disks with the biggest syncid.
1740		 * Find the number of disks with the biggest syncid.
1741		 * While here, find a disk with the biggest priority.
1742		 */
1743		dirty = ndisks = 0;
1744		pdisk = NULL;
1745		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1746			if (disk->d_sync.ds_syncid != syncid)
1747				continue;
1748			if ((disk->d_flags &
1749			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1750				continue;
1751			}
1752			ndisks++;
1753			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1754				dirty++;
1755				if (pdisk == NULL ||
1756				    pdisk->d_priority < disk->d_priority) {
1757					pdisk = disk;
1758				}
1759			}
1760		}
1761		if (dirty == 0) {
1762			/* No dirty disks at all, great. */
1763		} else if (dirty == ndisks) {
1764			/*
1765			 * Force synchronization for all dirty disks except one
1766			 * with the biggest priority.
1767			 */
1768			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1769			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1770			    "master disk for synchronization.",
1771			    g_mirror_get_diskname(pdisk), sc->sc_name);
1772			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1773				if (disk->d_sync.ds_syncid != syncid)
1774					continue;
1775				if ((disk->d_flags &
1776				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1777					continue;
1778				}
1779				KASSERT((disk->d_flags &
1780				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1781				    ("Disk %s isn't marked as dirty.",
1782				    g_mirror_get_diskname(disk)));
1783				/* Skip the disk with the biggest priority. */
1784				if (disk == pdisk)
1785					continue;
1786				disk->d_sync.ds_syncid = 0;
1787			}
1788		} else if (dirty < ndisks) {
1789			/*
1790			 * Force synchronization for all dirty disks.
1791			 * We have some non-dirty disks.
1792			 */
1793			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1794				if (disk->d_sync.ds_syncid != syncid)
1795					continue;
1796				if ((disk->d_flags &
1797				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1798					continue;
1799				}
1800				if ((disk->d_flags &
1801				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1802					continue;
1803				}
1804				disk->d_sync.ds_syncid = 0;
1805			}
1806		}
1807
1808		/* Reset hint. */
1809		sc->sc_hint = NULL;
1810		sc->sc_syncid = syncid;
1811		if (force) {
1812			/* Remember to bump syncid on first write. */
1813			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1814		}
1815		state = G_MIRROR_DEVICE_STATE_RUNNING;
1816		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1817		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1818		    g_mirror_device_state2str(state));
1819		sc->sc_state = state;
1820		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1821			state = g_mirror_determine_state(disk);
1822			g_mirror_event_send(disk, state,
1823			    G_MIRROR_EVENT_DONTWAIT);
1824			if (state == G_MIRROR_DISK_STATE_STALE) {
1825				sc->sc_bump_syncid =
1826				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1827			}
1828		}
1829		break;
1830	    }
1831	case G_MIRROR_DEVICE_STATE_RUNNING:
1832		/*
1833		 * Bump syncid here, if we need to do it immediately.
1834		 */
1835		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1836			sc->sc_bump_syncid = 0;
1837			g_mirror_bump_syncid(sc);
1838		}
1839		if (g_mirror_ndisks(sc, -1) == 0) {
1840			/*
1841			 * No disks at all, we need to destroy device.
1842			 */
1843			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1844		} else if (g_mirror_ndisks(sc,
1845		    G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1846		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1847			/*
1848			 * No active disks, destroy provider.
1849			 */
1850			if (sc->sc_provider != NULL)
1851				g_mirror_destroy_provider(sc);
1852		} else if (g_mirror_ndisks(sc,
1853		    G_MIRROR_DISK_STATE_ACTIVE) > 0) {
1854			/*
1855			 * We have active disks, launch provider if it doesn't
1856			 * exist.
1857			 */
1858			if (sc->sc_provider == NULL)
1859				g_mirror_launch_provider(sc);
1860		}
1861		break;
1862	default:
1863		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
1864		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
1865		break;
1866	}
1867}
1868
1869/*
1870 * Update disk state and device state if needed.
1871 */
1872#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
1873	"Disk %s state changed from %s to %s (device %s).",		\
1874	g_mirror_get_diskname(disk),					\
1875	g_mirror_disk_state2str(disk->d_state),				\
1876	g_mirror_disk_state2str(state), sc->sc_name)
1877static int
1878g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
1879{
1880	struct g_mirror_softc *sc;
1881
1882	g_topology_assert();
1883
1884	sc = disk->d_softc;
1885again:
1886	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
1887	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
1888	    g_mirror_disk_state2str(state));
1889	switch (state) {
1890	case G_MIRROR_DISK_STATE_NEW:
1891		/*
1892		 * Possible scenarios:
1893		 * 1. New disk arrive.
1894		 */
1895		/* Previous state should be NONE. */
1896		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
1897		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1898		    g_mirror_disk_state2str(disk->d_state)));
1899		DISK_STATE_CHANGED();
1900
1901		disk->d_state = state;
1902		LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
1903		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
1904		    sc->sc_name, g_mirror_get_diskname(disk));
1905		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
1906			break;
1907		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1908		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1909		    g_mirror_device_state2str(sc->sc_state),
1910		    g_mirror_get_diskname(disk),
1911		    g_mirror_disk_state2str(disk->d_state)));
1912		state = g_mirror_determine_state(disk);
1913		if (state != G_MIRROR_DISK_STATE_NONE)
1914			goto again;
1915		break;
1916	case G_MIRROR_DISK_STATE_ACTIVE:
1917		/*
1918		 * Possible scenarios:
1919		 * 1. New disk does not need synchronization.
1920		 * 2. Synchronization process finished successfully.
1921		 */
1922		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1923		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1924		    g_mirror_device_state2str(sc->sc_state),
1925		    g_mirror_get_diskname(disk),
1926		    g_mirror_disk_state2str(disk->d_state)));
1927		/* Previous state should be NEW or SYNCHRONIZING. */
1928		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
1929		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1930		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1931		    g_mirror_disk_state2str(disk->d_state)));
1932		DISK_STATE_CHANGED();
1933
1934		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
1935			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1936		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1937			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1938			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
1939			g_mirror_sync_stop(disk, 0);
1940		}
1941		disk->d_state = state;
1942		disk->d_sync.ds_offset = 0;
1943		disk->d_sync.ds_offset_done = 0;
1944		g_mirror_update_access(disk);
1945		g_mirror_update_metadata(disk);
1946		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
1947		    sc->sc_name, g_mirror_get_diskname(disk));
1948		break;
1949	case G_MIRROR_DISK_STATE_STALE:
1950		/*
1951		 * Possible scenarios:
1952		 * 1. Stale disk was connected.
1953		 */
1954		/* Previous state should be NEW. */
1955		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
1956		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1957		    g_mirror_disk_state2str(disk->d_state)));
1958		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1959		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1960		    g_mirror_device_state2str(sc->sc_state),
1961		    g_mirror_get_diskname(disk),
1962		    g_mirror_disk_state2str(disk->d_state)));
1963		/*
1964		 * STALE state is only possible if device is marked
1965		 * NOAUTOSYNC.
1966		 */
1967		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
1968		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1969		    g_mirror_device_state2str(sc->sc_state),
1970		    g_mirror_get_diskname(disk),
1971		    g_mirror_disk_state2str(disk->d_state)));
1972		DISK_STATE_CHANGED();
1973
1974		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1975		disk->d_state = state;
1976		g_mirror_update_metadata(disk);
1977		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
1978		    sc->sc_name, g_mirror_get_diskname(disk));
1979		break;
1980	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1981		/*
1982		 * Possible scenarios:
1983		 * 1. Disk which needs synchronization was connected.
1984		 */
1985		/* Previous state should be NEW. */
1986		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
1987		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1988		    g_mirror_disk_state2str(disk->d_state)));
1989		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1990		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1991		    g_mirror_device_state2str(sc->sc_state),
1992		    g_mirror_get_diskname(disk),
1993		    g_mirror_disk_state2str(disk->d_state)));
1994		DISK_STATE_CHANGED();
1995
1996		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
1997			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1998		disk->d_state = state;
1999		if (sc->sc_provider != NULL) {
2000			g_mirror_sync_start(disk);
2001			g_mirror_update_metadata(disk);
2002		}
2003		break;
2004	case G_MIRROR_DISK_STATE_DISCONNECTED:
2005		/*
2006		 * Possible scenarios:
2007		 * 1. Device wasn't running yet, but disk disappear.
2008		 * 2. Disk was active and disapppear.
2009		 * 3. Disk disappear during synchronization process.
2010		 */
2011		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2012			/*
2013			 * Previous state should be ACTIVE, STALE or
2014			 * SYNCHRONIZING.
2015			 */
2016			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2017			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2018			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2019			    ("Wrong disk state (%s, %s).",
2020			    g_mirror_get_diskname(disk),
2021			    g_mirror_disk_state2str(disk->d_state)));
2022		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2023			/* Previous state should be NEW. */
2024			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2025			    ("Wrong disk state (%s, %s).",
2026			    g_mirror_get_diskname(disk),
2027			    g_mirror_disk_state2str(disk->d_state)));
2028			/*
2029			 * Reset bumping syncid if disk disappeared in STARTING
2030			 * state.
2031			 */
2032			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2033				sc->sc_bump_syncid = 0;
2034#ifdef	INVARIANTS
2035		} else {
2036			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2037			    sc->sc_name,
2038			    g_mirror_device_state2str(sc->sc_state),
2039			    g_mirror_get_diskname(disk),
2040			    g_mirror_disk_state2str(disk->d_state)));
2041#endif
2042		}
2043		DISK_STATE_CHANGED();
2044		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2045		    sc->sc_name, g_mirror_get_diskname(disk));
2046
2047		g_mirror_destroy_disk(disk);
2048		break;
2049	case G_MIRROR_DISK_STATE_DESTROY:
2050	    {
2051		int error;
2052
2053		error = g_mirror_clear_metadata(disk);
2054		if (error != 0)
2055			return (error);
2056		DISK_STATE_CHANGED();
2057		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2058		    sc->sc_name, g_mirror_get_diskname(disk));
2059
2060		g_mirror_destroy_disk(disk);
2061		sc->sc_ndisks--;
2062		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2063			g_mirror_update_metadata(disk);
2064		}
2065		break;
2066	    }
2067	default:
2068		KASSERT(1 == 0, ("Unknown state (%u).", state));
2069		break;
2070	}
2071	return (0);
2072}
2073#undef	DISK_STATE_CHANGED
2074
2075static int
2076g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2077{
2078	struct g_provider *pp;
2079	u_char *buf;
2080	int error;
2081
2082	g_topology_assert();
2083
2084	error = g_access(cp, 1, 0, 0);
2085	if (error != 0)
2086		return (error);
2087	pp = cp->provider;
2088	g_topology_unlock();
2089	/* Metadata are stored on last sector. */
2090	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2091	    &error);
2092	g_topology_lock();
2093	if (buf == NULL) {
2094		g_access(cp, -1, 0, 0);
2095		return (error);
2096	}
2097	if (error != 0) {
2098		g_access(cp, -1, 0, 0);
2099		g_free(buf);
2100		return (error);
2101	}
2102	error = g_access(cp, -1, 0, 0);
2103	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2104
2105	/* Decode metadata. */
2106	error = mirror_metadata_decode(buf, md);
2107	g_free(buf);
2108	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2109		return (EINVAL);
2110	if (error != 0) {
2111		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2112		    cp->provider->name);
2113		return (error);
2114	}
2115
2116	return (0);
2117}
2118
2119static int
2120g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2121    struct g_mirror_metadata *md)
2122{
2123
2124	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2125		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2126		    pp->name, md->md_did);
2127		return (EEXIST);
2128	}
2129	if (md->md_all != sc->sc_ndisks) {
2130		G_MIRROR_DEBUG(1,
2131		    "Invalid '%s' field on disk %s (device %s), skipping.",
2132		    "md_all", pp->name, sc->sc_name);
2133		return (EINVAL);
2134	}
2135	if (md->md_slice != sc->sc_slice) {
2136		G_MIRROR_DEBUG(1,
2137		    "Invalid '%s' field on disk %s (device %s), skipping.",
2138		    "md_slice", pp->name, sc->sc_name);
2139		return (EINVAL);
2140	}
2141	if (md->md_balance != sc->sc_balance) {
2142		G_MIRROR_DEBUG(1,
2143		    "Invalid '%s' field on disk %s (device %s), skipping.",
2144		    "md_balance", pp->name, sc->sc_name);
2145		return (EINVAL);
2146	}
2147	if (md->md_mediasize != sc->sc_mediasize) {
2148		G_MIRROR_DEBUG(1,
2149		    "Invalid '%s' field on disk %s (device %s), skipping.",
2150		    "md_mediasize", pp->name, sc->sc_name);
2151		return (EINVAL);
2152	}
2153	if (sc->sc_mediasize > pp->mediasize) {
2154		G_MIRROR_DEBUG(1,
2155		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2156		    sc->sc_name);
2157		return (EINVAL);
2158	}
2159	if (md->md_sectorsize != sc->sc_sectorsize) {
2160		G_MIRROR_DEBUG(1,
2161		    "Invalid '%s' field on disk %s (device %s), skipping.",
2162		    "md_sectorsize", pp->name, sc->sc_name);
2163		return (EINVAL);
2164	}
2165	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2166		G_MIRROR_DEBUG(1,
2167		    "Invalid sector size of disk %s (device %s), skipping.",
2168		    pp->name, sc->sc_name);
2169		return (EINVAL);
2170	}
2171	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2172		G_MIRROR_DEBUG(1,
2173		    "Invalid device flags on disk %s (device %s), skipping.",
2174		    pp->name, sc->sc_name);
2175		return (EINVAL);
2176	}
2177	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2178		G_MIRROR_DEBUG(1,
2179		    "Invalid disk flags on disk %s (device %s), skipping.",
2180		    pp->name, sc->sc_name);
2181		return (EINVAL);
2182	}
2183	return (0);
2184}
2185
2186static int
2187g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2188    struct g_mirror_metadata *md)
2189{
2190	struct g_mirror_disk *disk;
2191	int error;
2192
2193	g_topology_assert();
2194	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2195
2196	error = g_mirror_check_metadata(sc, pp, md);
2197	if (error != 0)
2198		return (error);
2199	disk = g_mirror_init_disk(sc, pp, md, &error);
2200	if (disk == NULL)
2201		return (error);
2202	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2203	    G_MIRROR_EVENT_WAIT);
2204	return (error);
2205}
2206
2207static int
2208g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2209{
2210	struct g_mirror_softc *sc;
2211	struct g_mirror_disk *disk;
2212	int dcr, dcw, dce, err, error;
2213
2214	g_topology_assert();
2215	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2216	    acw, ace);
2217
2218	dcr = pp->acr + acr;
2219	dcw = pp->acw + acw;
2220	dce = pp->ace + ace;
2221
2222	/* On first open, grab an extra "exclusive" bit */
2223	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2224		ace++;
2225	/* ... and let go of it on last close */
2226	if (dcr == 0 && dcw == 0 && dce == 0)
2227		ace--;
2228
2229	sc = pp->geom->softc;
2230	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2231		if (acr <= 0 && acw <= 0 && ace <= 0)
2232			return (0);
2233		else
2234			return (ENXIO);
2235	}
2236	error = ENXIO;
2237	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2238		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2239			continue;
2240		err = g_access(disk->d_consumer, acr, acw, ace);
2241		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2242		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2243		if (err == 0) {
2244			/*
2245			 * Mark disk as dirty on open and unmark on close.
2246			 */
2247			if (pp->acw == 0 && dcw > 0) {
2248				G_MIRROR_DEBUG(1,
2249				    "Disk %s (device %s) marked as dirty.",
2250				    g_mirror_get_diskname(disk), sc->sc_name);
2251				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2252				g_mirror_update_metadata(disk);
2253			} else if (pp->acw > 0 && dcw == 0) {
2254				G_MIRROR_DEBUG(1,
2255				    "Disk %s (device %s) marked as clean.",
2256				    g_mirror_get_diskname(disk), sc->sc_name);
2257				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2258				g_mirror_update_metadata(disk);
2259			}
2260			error = 0;
2261		} else {
2262			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2263			g_mirror_event_send(disk,
2264			    G_MIRROR_DISK_STATE_DISCONNECTED,
2265			    G_MIRROR_EVENT_DONTWAIT);
2266		}
2267	}
2268	return (error);
2269}
2270
2271static struct g_geom *
2272g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2273{
2274	struct g_mirror_softc *sc;
2275	struct g_geom *gp;
2276	int error, timeout;
2277
2278	g_topology_assert();
2279	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2280	    md->md_mid);
2281
2282	/* One disk is minimum. */
2283	if (md->md_all < 1)
2284		return (NULL);
2285	/*
2286	 * Action geom.
2287	 */
2288	gp = g_new_geomf(mp, "%s", md->md_name);
2289	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2290	gp->start = g_mirror_start;
2291	gp->spoiled = g_mirror_orphan;
2292	gp->orphan = g_mirror_orphan;
2293	gp->access = g_mirror_access;
2294	gp->dumpconf = g_mirror_dumpconf;
2295
2296	sc->sc_id = md->md_mid;
2297	sc->sc_slice = md->md_slice;
2298	sc->sc_balance = md->md_balance;
2299	sc->sc_mediasize = md->md_mediasize;
2300	sc->sc_sectorsize = md->md_sectorsize;
2301	sc->sc_ndisks = md->md_all;
2302	sc->sc_flags = md->md_mflags;
2303	sc->sc_bump_syncid = 0;
2304	bioq_init(&sc->sc_queue);
2305	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2306	LIST_INIT(&sc->sc_disks);
2307	TAILQ_INIT(&sc->sc_events);
2308	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2309	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2310	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2311	gp->softc = sc;
2312	sc->sc_geom = gp;
2313	sc->sc_provider = NULL;
2314	/*
2315	 * Synchronization geom.
2316	 */
2317	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2318	gp->softc = sc;
2319	gp->spoiled = g_mirror_orphan;
2320	gp->orphan = g_mirror_orphan;
2321	sc->sc_sync.ds_geom = gp;
2322	sc->sc_sync.ds_block = atomic_load_acq_int(&g_mirror_sync_block_size);
2323	sc->sc_sync.ds_ndisks = 0;
2324	sc->sc_sync.ds_zone = uma_zcreate("gmirror:sync", sc->sc_sync.ds_block,
2325	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2326	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2327	    "g_mirror %s", md->md_name);
2328	if (error != 0) {
2329		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2330		    sc->sc_name);
2331		uma_zdestroy(sc->sc_sync.ds_zone);
2332		g_destroy_geom(sc->sc_sync.ds_geom);
2333		mtx_destroy(&sc->sc_events_mtx);
2334		mtx_destroy(&sc->sc_queue_mtx);
2335		g_destroy_geom(sc->sc_geom);
2336		free(sc, M_MIRROR);
2337		return (NULL);
2338	}
2339
2340	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2341
2342	/*
2343	 * Run timeout.
2344	 */
2345	timeout = atomic_load_acq_int(&g_mirror_timeout);
2346	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2347	return (sc->sc_geom);
2348}
2349
2350int
2351g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2352{
2353	struct g_provider *pp;
2354
2355	g_topology_assert();
2356
2357	if (sc == NULL)
2358		return (ENXIO);
2359	pp = sc->sc_provider;
2360	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2361		if (force) {
2362			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2363			    "can't be definitely removed.", pp->name);
2364		} else {
2365			G_MIRROR_DEBUG(1,
2366			    "Device %s is still open (r%dw%de%d).", pp->name,
2367			    pp->acr, pp->acw, pp->ace);
2368			return (EBUSY);
2369		}
2370	}
2371
2372	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2373	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2374	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2375	mtx_lock(&sc->sc_queue_mtx);
2376	wakeup(sc);
2377	mtx_unlock(&sc->sc_queue_mtx);
2378	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2379	while (sc->sc_worker != NULL)
2380		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2381	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2382	g_mirror_destroy_device(sc);
2383	free(sc, M_MIRROR);
2384	return (0);
2385}
2386
2387static void
2388g_mirror_taste_orphan(struct g_consumer *cp)
2389{
2390
2391	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2392	    cp->provider->name));
2393}
2394
2395static struct g_geom *
2396g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2397{
2398	struct g_mirror_metadata md;
2399	struct g_mirror_softc *sc;
2400	struct g_consumer *cp;
2401	struct g_geom *gp;
2402	int error;
2403
2404	g_topology_assert();
2405	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2406	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2407
2408	gp = g_new_geomf(mp, "mirror:taste");
2409	/*
2410	 * This orphan function should be never called.
2411	 */
2412	gp->orphan = g_mirror_taste_orphan;
2413	cp = g_new_consumer(gp);
2414	g_attach(cp, pp);
2415	error = g_mirror_read_metadata(cp, &md);
2416	g_detach(cp);
2417	g_destroy_consumer(cp);
2418	g_destroy_geom(gp);
2419	if (error != 0)
2420		return (NULL);
2421	gp = NULL;
2422
2423	if (md.md_version > G_MIRROR_VERSION) {
2424		printf("geom_mirror.ko module is too old to handle %s.\n",
2425		    pp->name);
2426		return (NULL);
2427	}
2428	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2429		G_MIRROR_DEBUG(0,
2430		    "Device %s: provider %s marked as inactive, skipping.",
2431		    md.md_name, pp->name);
2432		return (NULL);
2433	}
2434	if (g_mirror_debug >= 2)
2435		mirror_metadata_dump(&md);
2436
2437	/*
2438	 * Let's check if device already exists.
2439	 */
2440	LIST_FOREACH(gp, &mp->geom, geom) {
2441		sc = gp->softc;
2442		if (sc == NULL)
2443			continue;
2444		if (sc->sc_sync.ds_geom == gp)
2445			continue;
2446		if (strcmp(md.md_name, sc->sc_name) != 0)
2447			continue;
2448		if (md.md_mid != sc->sc_id) {
2449			G_MIRROR_DEBUG(0, "Device %s already configured.",
2450			    sc->sc_name);
2451			return (NULL);
2452		}
2453		break;
2454	}
2455	if (gp == NULL) {
2456		gp = g_mirror_create(mp, &md);
2457		if (gp == NULL) {
2458			G_MIRROR_DEBUG(0, "Cannot create device %s.mirror",
2459			    md.md_name);
2460			return (NULL);
2461		}
2462		sc = gp->softc;
2463	}
2464	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2465	error = g_mirror_add_disk(sc, pp, &md);
2466	if (error != 0) {
2467		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2468		    pp->name, gp->name, error);
2469		if (LIST_EMPTY(&sc->sc_disks))
2470			g_mirror_destroy(sc, 1);
2471		return (NULL);
2472	}
2473	return (gp);
2474}
2475
2476static int
2477g_mirror_destroy_geom(struct gctl_req *req __unused,
2478    struct g_class *mp __unused, struct g_geom *gp)
2479{
2480
2481	return (g_mirror_destroy(gp->softc, 0));
2482}
2483
2484static void
2485g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2486    struct g_consumer *cp, struct g_provider *pp)
2487{
2488	struct g_mirror_softc *sc;
2489
2490	g_topology_assert();
2491
2492	sc = gp->softc;
2493	if (sc == NULL)
2494		return;
2495	/* Skip synchronization geom. */
2496	if (gp == sc->sc_sync.ds_geom)
2497		return;
2498	if (pp != NULL) {
2499		/* Nothing here. */
2500	} else if (cp != NULL) {
2501		struct g_mirror_disk *disk;
2502
2503		disk = cp->private;
2504		if (disk == NULL)
2505			return;
2506		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2507		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2508			sbuf_printf(sb, "%s<Synchronized>", indent);
2509			if (disk->d_sync.ds_offset_done == 0)
2510				sbuf_printf(sb, "0%%");
2511			else {
2512				sbuf_printf(sb, "%u%%",
2513				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2514				    sc->sc_provider->mediasize));
2515			}
2516			sbuf_printf(sb, "</Synchronized>\n");
2517		}
2518		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2519		    disk->d_sync.ds_syncid);
2520		sbuf_printf(sb, "%s<Flags>", indent);
2521		if (disk->d_flags == 0)
2522			sbuf_printf(sb, "NONE");
2523		else {
2524			int first = 1;
2525
2526			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2527				if (!first)
2528					sbuf_printf(sb, ", ");
2529				else
2530					first = 0;
2531				sbuf_printf(sb, "DIRTY");
2532			}
2533			if ((disk->d_flags &
2534			    G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2535				if (!first)
2536					sbuf_printf(sb, ", ");
2537				else
2538					first = 0;
2539				sbuf_printf(sb, "INACTIVE");
2540			}
2541			if ((disk->d_flags &
2542			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2543				if (!first)
2544					sbuf_printf(sb, ", ");
2545				else
2546					first = 0;
2547				sbuf_printf(sb, "SYNCHRONIZING");
2548			}
2549			if ((disk->d_flags &
2550			    G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2551				if (!first)
2552					sbuf_printf(sb, ", ");
2553				else
2554					first = 0;
2555				sbuf_printf(sb, "FORCE_SYNC");
2556			}
2557		}
2558		sbuf_printf(sb, "</Flags>\n");
2559		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2560		    disk->d_priority);
2561		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2562		    g_mirror_disk_state2str(disk->d_state));
2563	} else {
2564		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2565		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2566		sbuf_printf(sb, "%s<Flags>", indent);
2567		if (sc->sc_flags == 0)
2568			sbuf_printf(sb, "NONE");
2569		else {
2570			int first = 1;
2571
2572			if ((sc->sc_flags &
2573			    G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0) {
2574				if (!first)
2575					sbuf_printf(sb, ", ");
2576				else
2577					first = 0;
2578				sbuf_printf(sb, "NOAUTOSYNC");
2579			}
2580		}
2581		sbuf_printf(sb, "</Flags>\n");
2582		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2583		    (u_int)sc->sc_slice);
2584		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2585		    balance_name(sc->sc_balance));
2586		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2587		    sc->sc_ndisks);
2588	}
2589}
2590
2591DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2592