g_mirror.c revision 309205
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 309205 2016-11-27 05:58:47Z mav $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/module.h>
34#include <sys/limits.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/bio.h>
38#include <sys/sbuf.h>
39#include <sys/sysctl.h>
40#include <sys/malloc.h>
41#include <sys/eventhandler.h>
42#include <vm/uma.h>
43#include <geom/geom.h>
44#include <sys/proc.h>
45#include <sys/kthread.h>
46#include <sys/sched.h>
47#include <geom/mirror/g_mirror.h>
48
49FEATURE(geom_mirror, "GEOM mirroring support");
50
51static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52
53SYSCTL_DECL(_kern_geom);
54static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55    "GEOM_MIRROR stuff");
56u_int g_mirror_debug = 0;
57SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
58    "Debug level");
59static u_int g_mirror_timeout = 4;
60SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
61    0, "Time to wait on all mirror components");
62static u_int g_mirror_idletime = 5;
63SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
64    &g_mirror_idletime, 0, "Mark components as clean when idling");
65static u_int g_mirror_disconnect_on_failure = 1;
66SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
67    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
68static u_int g_mirror_syncreqs = 2;
69SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
70    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
71
72#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
73	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
74	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
75	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
76} while (0)
77
78static eventhandler_tag g_mirror_post_sync = NULL;
79static int g_mirror_shutdown = 0;
80
81static g_ctl_destroy_geom_t g_mirror_destroy_geom;
82static g_taste_t g_mirror_taste;
83static g_init_t g_mirror_init;
84static g_fini_t g_mirror_fini;
85static g_provgone_t g_mirror_providergone;
86static g_resize_t g_mirror_resize;
87
88struct g_class g_mirror_class = {
89	.name = G_MIRROR_CLASS_NAME,
90	.version = G_VERSION,
91	.ctlreq = g_mirror_config,
92	.taste = g_mirror_taste,
93	.destroy_geom = g_mirror_destroy_geom,
94	.init = g_mirror_init,
95	.fini = g_mirror_fini,
96	.providergone = g_mirror_providergone,
97	.resize = g_mirror_resize
98};
99
100
101static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
102static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
103static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
104static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
105    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
106static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
107static void g_mirror_register_request(struct bio *bp);
108static void g_mirror_sync_release(struct g_mirror_softc *sc);
109
110
111static const char *
112g_mirror_disk_state2str(int state)
113{
114
115	switch (state) {
116	case G_MIRROR_DISK_STATE_NONE:
117		return ("NONE");
118	case G_MIRROR_DISK_STATE_NEW:
119		return ("NEW");
120	case G_MIRROR_DISK_STATE_ACTIVE:
121		return ("ACTIVE");
122	case G_MIRROR_DISK_STATE_STALE:
123		return ("STALE");
124	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
125		return ("SYNCHRONIZING");
126	case G_MIRROR_DISK_STATE_DISCONNECTED:
127		return ("DISCONNECTED");
128	case G_MIRROR_DISK_STATE_DESTROY:
129		return ("DESTROY");
130	default:
131		return ("INVALID");
132	}
133}
134
135static const char *
136g_mirror_device_state2str(int state)
137{
138
139	switch (state) {
140	case G_MIRROR_DEVICE_STATE_STARTING:
141		return ("STARTING");
142	case G_MIRROR_DEVICE_STATE_RUNNING:
143		return ("RUNNING");
144	default:
145		return ("INVALID");
146	}
147}
148
149static const char *
150g_mirror_get_diskname(struct g_mirror_disk *disk)
151{
152
153	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
154		return ("[unknown]");
155	return (disk->d_name);
156}
157
158/*
159 * --- Events handling functions ---
160 * Events in geom_mirror are used to maintain disks and device status
161 * from one thread to simplify locking.
162 */
163static void
164g_mirror_event_free(struct g_mirror_event *ep)
165{
166
167	free(ep, M_MIRROR);
168}
169
170int
171g_mirror_event_send(void *arg, int state, int flags)
172{
173	struct g_mirror_softc *sc;
174	struct g_mirror_disk *disk;
175	struct g_mirror_event *ep;
176	int error;
177
178	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
179	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
180	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
181		disk = NULL;
182		sc = arg;
183	} else {
184		disk = arg;
185		sc = disk->d_softc;
186	}
187	ep->e_disk = disk;
188	ep->e_state = state;
189	ep->e_flags = flags;
190	ep->e_error = 0;
191	mtx_lock(&sc->sc_events_mtx);
192	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
193	mtx_unlock(&sc->sc_events_mtx);
194	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
195	mtx_lock(&sc->sc_queue_mtx);
196	wakeup(sc);
197	mtx_unlock(&sc->sc_queue_mtx);
198	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
199		return (0);
200	sx_assert(&sc->sc_lock, SX_XLOCKED);
201	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
202	sx_xunlock(&sc->sc_lock);
203	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
204		mtx_lock(&sc->sc_events_mtx);
205		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
206		    hz * 5);
207	}
208	error = ep->e_error;
209	g_mirror_event_free(ep);
210	sx_xlock(&sc->sc_lock);
211	return (error);
212}
213
214static struct g_mirror_event *
215g_mirror_event_get(struct g_mirror_softc *sc)
216{
217	struct g_mirror_event *ep;
218
219	mtx_lock(&sc->sc_events_mtx);
220	ep = TAILQ_FIRST(&sc->sc_events);
221	mtx_unlock(&sc->sc_events_mtx);
222	return (ep);
223}
224
225static void
226g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
227{
228
229	mtx_lock(&sc->sc_events_mtx);
230	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231	mtx_unlock(&sc->sc_events_mtx);
232}
233
234static void
235g_mirror_event_cancel(struct g_mirror_disk *disk)
236{
237	struct g_mirror_softc *sc;
238	struct g_mirror_event *ep, *tmpep;
239
240	sc = disk->d_softc;
241	sx_assert(&sc->sc_lock, SX_XLOCKED);
242
243	mtx_lock(&sc->sc_events_mtx);
244	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
245		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
246			continue;
247		if (ep->e_disk != disk)
248			continue;
249		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
250		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
251			g_mirror_event_free(ep);
252		else {
253			ep->e_error = ECANCELED;
254			wakeup(ep);
255		}
256	}
257	mtx_unlock(&sc->sc_events_mtx);
258}
259
260/*
261 * Return the number of disks in given state.
262 * If state is equal to -1, count all connected disks.
263 */
264u_int
265g_mirror_ndisks(struct g_mirror_softc *sc, int state)
266{
267	struct g_mirror_disk *disk;
268	u_int n = 0;
269
270	sx_assert(&sc->sc_lock, SX_LOCKED);
271
272	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
273		if (state == -1 || disk->d_state == state)
274			n++;
275	}
276	return (n);
277}
278
279/*
280 * Find a disk in mirror by its disk ID.
281 */
282static struct g_mirror_disk *
283g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
284{
285	struct g_mirror_disk *disk;
286
287	sx_assert(&sc->sc_lock, SX_XLOCKED);
288
289	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
290		if (disk->d_id == id)
291			return (disk);
292	}
293	return (NULL);
294}
295
296static u_int
297g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
298{
299	struct bio *bp;
300	u_int nreqs = 0;
301
302	mtx_lock(&sc->sc_queue_mtx);
303	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
304		if (bp->bio_from == cp)
305			nreqs++;
306	}
307	mtx_unlock(&sc->sc_queue_mtx);
308	return (nreqs);
309}
310
311static int
312g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
313{
314
315	if (cp->index > 0) {
316		G_MIRROR_DEBUG(2,
317		    "I/O requests for %s exist, can't destroy it now.",
318		    cp->provider->name);
319		return (1);
320	}
321	if (g_mirror_nrequests(sc, cp) > 0) {
322		G_MIRROR_DEBUG(2,
323		    "I/O requests for %s in queue, can't destroy it now.",
324		    cp->provider->name);
325		return (1);
326	}
327	return (0);
328}
329
330static void
331g_mirror_destroy_consumer(void *arg, int flags __unused)
332{
333	struct g_consumer *cp;
334
335	g_topology_assert();
336
337	cp = arg;
338	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
339	g_detach(cp);
340	g_destroy_consumer(cp);
341}
342
343static void
344g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
345{
346	struct g_provider *pp;
347	int retaste_wait;
348
349	g_topology_assert();
350
351	cp->private = NULL;
352	if (g_mirror_is_busy(sc, cp))
353		return;
354	pp = cp->provider;
355	retaste_wait = 0;
356	if (cp->acw == 1) {
357		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
358			retaste_wait = 1;
359	}
360	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
361	    -cp->acw, -cp->ace, 0);
362	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
363		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
364	if (retaste_wait) {
365		/*
366		 * After retaste event was send (inside g_access()), we can send
367		 * event to detach and destroy consumer.
368		 * A class, which has consumer to the given provider connected
369		 * will not receive retaste event for the provider.
370		 * This is the way how I ignore retaste events when I close
371		 * consumers opened for write: I detach and destroy consumer
372		 * after retaste event is sent.
373		 */
374		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
375		return;
376	}
377	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
378	g_detach(cp);
379	g_destroy_consumer(cp);
380}
381
382static int
383g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
384{
385	struct g_consumer *cp;
386	int error;
387
388	g_topology_assert_not();
389	KASSERT(disk->d_consumer == NULL,
390	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
391
392	g_topology_lock();
393	cp = g_new_consumer(disk->d_softc->sc_geom);
394	cp->flags |= G_CF_DIRECT_RECEIVE;
395	error = g_attach(cp, pp);
396	if (error != 0) {
397		g_destroy_consumer(cp);
398		g_topology_unlock();
399		return (error);
400	}
401	error = g_access(cp, 1, 1, 1);
402	if (error != 0) {
403		g_detach(cp);
404		g_destroy_consumer(cp);
405		g_topology_unlock();
406		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
407		    pp->name, error);
408		return (error);
409	}
410	g_topology_unlock();
411	disk->d_consumer = cp;
412	disk->d_consumer->private = disk;
413	disk->d_consumer->index = 0;
414
415	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
416	return (0);
417}
418
419static void
420g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
421{
422
423	g_topology_assert();
424
425	if (cp == NULL)
426		return;
427	if (cp->provider != NULL)
428		g_mirror_kill_consumer(sc, cp);
429	else
430		g_destroy_consumer(cp);
431}
432
433/*
434 * Initialize disk. This means allocate memory, create consumer, attach it
435 * to the provider and open access (r1w1e1) to it.
436 */
437static struct g_mirror_disk *
438g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
439    struct g_mirror_metadata *md, int *errorp)
440{
441	struct g_mirror_disk *disk;
442	int i, error;
443
444	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
445	if (disk == NULL) {
446		error = ENOMEM;
447		goto fail;
448	}
449	disk->d_softc = sc;
450	error = g_mirror_connect_disk(disk, pp);
451	if (error != 0)
452		goto fail;
453	disk->d_id = md->md_did;
454	disk->d_state = G_MIRROR_DISK_STATE_NONE;
455	disk->d_priority = md->md_priority;
456	disk->d_flags = md->md_dflags;
457	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
458	if (error == 0 && i != 0)
459		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
460	if (md->md_provider[0] != '\0')
461		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
462	disk->d_sync.ds_consumer = NULL;
463	disk->d_sync.ds_offset = md->md_sync_offset;
464	disk->d_sync.ds_offset_done = md->md_sync_offset;
465	disk->d_genid = md->md_genid;
466	disk->d_sync.ds_syncid = md->md_syncid;
467	if (errorp != NULL)
468		*errorp = 0;
469	return (disk);
470fail:
471	if (errorp != NULL)
472		*errorp = error;
473	if (disk != NULL)
474		free(disk, M_MIRROR);
475	return (NULL);
476}
477
478static void
479g_mirror_destroy_disk(struct g_mirror_disk *disk)
480{
481	struct g_mirror_softc *sc;
482
483	g_topology_assert_not();
484	sc = disk->d_softc;
485	sx_assert(&sc->sc_lock, SX_XLOCKED);
486
487	LIST_REMOVE(disk, d_next);
488	g_mirror_event_cancel(disk);
489	if (sc->sc_hint == disk)
490		sc->sc_hint = NULL;
491	switch (disk->d_state) {
492	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
493		g_mirror_sync_stop(disk, 1);
494		/* FALLTHROUGH */
495	case G_MIRROR_DISK_STATE_NEW:
496	case G_MIRROR_DISK_STATE_STALE:
497	case G_MIRROR_DISK_STATE_ACTIVE:
498		g_topology_lock();
499		g_mirror_disconnect_consumer(sc, disk->d_consumer);
500		g_topology_unlock();
501		free(disk, M_MIRROR);
502		break;
503	default:
504		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
505		    g_mirror_get_diskname(disk),
506		    g_mirror_disk_state2str(disk->d_state)));
507	}
508}
509
510static void
511g_mirror_free_device(struct g_mirror_softc *sc)
512{
513
514	mtx_destroy(&sc->sc_queue_mtx);
515	mtx_destroy(&sc->sc_events_mtx);
516	mtx_destroy(&sc->sc_done_mtx);
517	sx_destroy(&sc->sc_lock);
518	free(sc, M_MIRROR);
519}
520
521static void
522g_mirror_providergone(struct g_provider *pp)
523{
524	struct g_mirror_softc *sc = pp->private;
525
526	if ((--sc->sc_refcnt) == 0)
527		g_mirror_free_device(sc);
528}
529
530static void
531g_mirror_destroy_device(struct g_mirror_softc *sc)
532{
533	struct g_mirror_disk *disk;
534	struct g_mirror_event *ep;
535	struct g_geom *gp;
536	struct g_consumer *cp, *tmpcp;
537
538	g_topology_assert_not();
539	sx_assert(&sc->sc_lock, SX_XLOCKED);
540
541	gp = sc->sc_geom;
542	if (sc->sc_provider != NULL)
543		g_mirror_destroy_provider(sc);
544	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
545	    disk = LIST_FIRST(&sc->sc_disks)) {
546		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
547		g_mirror_update_metadata(disk);
548		g_mirror_destroy_disk(disk);
549	}
550	while ((ep = g_mirror_event_get(sc)) != NULL) {
551		g_mirror_event_remove(sc, ep);
552		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
553			g_mirror_event_free(ep);
554		else {
555			ep->e_error = ECANCELED;
556			ep->e_flags |= G_MIRROR_EVENT_DONE;
557			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
558			mtx_lock(&sc->sc_events_mtx);
559			wakeup(ep);
560			mtx_unlock(&sc->sc_events_mtx);
561		}
562	}
563	callout_drain(&sc->sc_callout);
564
565	g_topology_lock();
566	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
567		g_mirror_disconnect_consumer(sc, cp);
568	}
569	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
570	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
571	g_wither_geom(gp, ENXIO);
572	sx_xunlock(&sc->sc_lock);
573	if ((--sc->sc_refcnt) == 0)
574		g_mirror_free_device(sc);
575	g_topology_unlock();
576}
577
578static void
579g_mirror_orphan(struct g_consumer *cp)
580{
581	struct g_mirror_disk *disk;
582
583	g_topology_assert();
584
585	disk = cp->private;
586	if (disk == NULL)
587		return;
588	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
589	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
590	    G_MIRROR_EVENT_DONTWAIT);
591}
592
593/*
594 * Function should return the next active disk on the list.
595 * It is possible that it will be the same disk as given.
596 * If there are no active disks on list, NULL is returned.
597 */
598static __inline struct g_mirror_disk *
599g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
600{
601	struct g_mirror_disk *dp;
602
603	for (dp = LIST_NEXT(disk, d_next); dp != disk;
604	    dp = LIST_NEXT(dp, d_next)) {
605		if (dp == NULL)
606			dp = LIST_FIRST(&sc->sc_disks);
607		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
608			break;
609	}
610	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
611		return (NULL);
612	return (dp);
613}
614
615static struct g_mirror_disk *
616g_mirror_get_disk(struct g_mirror_softc *sc)
617{
618	struct g_mirror_disk *disk;
619
620	if (sc->sc_hint == NULL) {
621		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
622		if (sc->sc_hint == NULL)
623			return (NULL);
624	}
625	disk = sc->sc_hint;
626	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
627		disk = g_mirror_find_next(sc, disk);
628		if (disk == NULL)
629			return (NULL);
630	}
631	sc->sc_hint = g_mirror_find_next(sc, disk);
632	return (disk);
633}
634
635static int
636g_mirror_write_metadata(struct g_mirror_disk *disk,
637    struct g_mirror_metadata *md)
638{
639	struct g_mirror_softc *sc;
640	struct g_consumer *cp;
641	off_t offset, length;
642	u_char *sector;
643	int error = 0;
644
645	g_topology_assert_not();
646	sc = disk->d_softc;
647	sx_assert(&sc->sc_lock, SX_LOCKED);
648
649	cp = disk->d_consumer;
650	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
651	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
652	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
653	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
654	    cp->acw, cp->ace));
655	length = cp->provider->sectorsize;
656	offset = cp->provider->mediasize - length;
657	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
658	if (md != NULL &&
659	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
660		/*
661		 * Handle the case, when the size of parent provider reduced.
662		 */
663		if (offset < md->md_mediasize)
664			error = ENOSPC;
665		else
666			mirror_metadata_encode(md, sector);
667	}
668	if (error == 0)
669		error = g_write_data(cp, offset, sector, length);
670	free(sector, M_MIRROR);
671	if (error != 0) {
672		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
673			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
674			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
675			    "(device=%s, error=%d).",
676			    g_mirror_get_diskname(disk), sc->sc_name, error);
677		} else {
678			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
679			    "(device=%s, error=%d).",
680			    g_mirror_get_diskname(disk), sc->sc_name, error);
681		}
682		if (g_mirror_disconnect_on_failure &&
683		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
684			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
685			g_mirror_event_send(disk,
686			    G_MIRROR_DISK_STATE_DISCONNECTED,
687			    G_MIRROR_EVENT_DONTWAIT);
688		}
689	}
690	return (error);
691}
692
693static int
694g_mirror_clear_metadata(struct g_mirror_disk *disk)
695{
696	int error;
697
698	g_topology_assert_not();
699	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
700
701	error = g_mirror_write_metadata(disk, NULL);
702	if (error == 0) {
703		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
704		    g_mirror_get_diskname(disk));
705	} else {
706		G_MIRROR_DEBUG(0,
707		    "Cannot clear metadata on disk %s (error=%d).",
708		    g_mirror_get_diskname(disk), error);
709	}
710	return (error);
711}
712
713void
714g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
715    struct g_mirror_metadata *md)
716{
717
718	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
719	md->md_version = G_MIRROR_VERSION;
720	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
721	md->md_mid = sc->sc_id;
722	md->md_all = sc->sc_ndisks;
723	md->md_slice = sc->sc_slice;
724	md->md_balance = sc->sc_balance;
725	md->md_genid = sc->sc_genid;
726	md->md_mediasize = sc->sc_mediasize;
727	md->md_sectorsize = sc->sc_sectorsize;
728	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
729	bzero(md->md_provider, sizeof(md->md_provider));
730	if (disk == NULL) {
731		md->md_did = arc4random();
732		md->md_priority = 0;
733		md->md_syncid = 0;
734		md->md_dflags = 0;
735		md->md_sync_offset = 0;
736		md->md_provsize = 0;
737	} else {
738		md->md_did = disk->d_id;
739		md->md_priority = disk->d_priority;
740		md->md_syncid = disk->d_sync.ds_syncid;
741		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
742		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
743			md->md_sync_offset = disk->d_sync.ds_offset_done;
744		else
745			md->md_sync_offset = 0;
746		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
747			strlcpy(md->md_provider,
748			    disk->d_consumer->provider->name,
749			    sizeof(md->md_provider));
750		}
751		md->md_provsize = disk->d_consumer->provider->mediasize;
752	}
753}
754
755void
756g_mirror_update_metadata(struct g_mirror_disk *disk)
757{
758	struct g_mirror_softc *sc;
759	struct g_mirror_metadata md;
760	int error;
761
762	g_topology_assert_not();
763	sc = disk->d_softc;
764	sx_assert(&sc->sc_lock, SX_LOCKED);
765
766	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
767		g_mirror_fill_metadata(sc, disk, &md);
768	error = g_mirror_write_metadata(disk, &md);
769	if (error == 0) {
770		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
771		    g_mirror_get_diskname(disk));
772	} else {
773		G_MIRROR_DEBUG(0,
774		    "Cannot update metadata on disk %s (error=%d).",
775		    g_mirror_get_diskname(disk), error);
776	}
777}
778
779static void
780g_mirror_bump_syncid(struct g_mirror_softc *sc)
781{
782	struct g_mirror_disk *disk;
783
784	g_topology_assert_not();
785	sx_assert(&sc->sc_lock, SX_XLOCKED);
786	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
787	    ("%s called with no active disks (device=%s).", __func__,
788	    sc->sc_name));
789
790	sc->sc_syncid++;
791	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
792	    sc->sc_syncid);
793	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
794		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
795		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
796			disk->d_sync.ds_syncid = sc->sc_syncid;
797			g_mirror_update_metadata(disk);
798		}
799	}
800}
801
802static void
803g_mirror_bump_genid(struct g_mirror_softc *sc)
804{
805	struct g_mirror_disk *disk;
806
807	g_topology_assert_not();
808	sx_assert(&sc->sc_lock, SX_XLOCKED);
809	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
810	    ("%s called with no active disks (device=%s).", __func__,
811	    sc->sc_name));
812
813	sc->sc_genid++;
814	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
815	    sc->sc_genid);
816	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
817		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
818		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
819			disk->d_genid = sc->sc_genid;
820			g_mirror_update_metadata(disk);
821		}
822	}
823}
824
825static int
826g_mirror_idle(struct g_mirror_softc *sc, int acw)
827{
828	struct g_mirror_disk *disk;
829	int timeout;
830
831	g_topology_assert_not();
832	sx_assert(&sc->sc_lock, SX_XLOCKED);
833
834	if (sc->sc_provider == NULL)
835		return (0);
836	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
837		return (0);
838	if (sc->sc_idle)
839		return (0);
840	if (sc->sc_writes > 0)
841		return (0);
842	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
843		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
844		if (!g_mirror_shutdown && timeout > 0)
845			return (timeout);
846	}
847	sc->sc_idle = 1;
848	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
849		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
850			continue;
851		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
852		    g_mirror_get_diskname(disk), sc->sc_name);
853		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
854		g_mirror_update_metadata(disk);
855	}
856	return (0);
857}
858
859static void
860g_mirror_unidle(struct g_mirror_softc *sc)
861{
862	struct g_mirror_disk *disk;
863
864	g_topology_assert_not();
865	sx_assert(&sc->sc_lock, SX_XLOCKED);
866
867	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
868		return;
869	sc->sc_idle = 0;
870	sc->sc_last_write = time_uptime;
871	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
872		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
873			continue;
874		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
875		    g_mirror_get_diskname(disk), sc->sc_name);
876		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
877		g_mirror_update_metadata(disk);
878	}
879}
880
881static void
882g_mirror_flush_done(struct bio *bp)
883{
884	struct g_mirror_softc *sc;
885	struct bio *pbp;
886
887	pbp = bp->bio_parent;
888	sc = pbp->bio_to->private;
889	mtx_lock(&sc->sc_done_mtx);
890	if (pbp->bio_error == 0)
891		pbp->bio_error = bp->bio_error;
892	pbp->bio_completed += bp->bio_completed;
893	pbp->bio_inbed++;
894	if (pbp->bio_children == pbp->bio_inbed) {
895		mtx_unlock(&sc->sc_done_mtx);
896		g_io_deliver(pbp, pbp->bio_error);
897	} else
898		mtx_unlock(&sc->sc_done_mtx);
899	g_destroy_bio(bp);
900}
901
902static void
903g_mirror_done(struct bio *bp)
904{
905	struct g_mirror_softc *sc;
906
907	sc = bp->bio_from->geom->softc;
908	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
909	mtx_lock(&sc->sc_queue_mtx);
910	bioq_insert_tail(&sc->sc_queue, bp);
911	mtx_unlock(&sc->sc_queue_mtx);
912	wakeup(sc);
913}
914
915static void
916g_mirror_regular_request(struct bio *bp)
917{
918	struct g_mirror_softc *sc;
919	struct g_mirror_disk *disk;
920	struct bio *pbp;
921
922	g_topology_assert_not();
923
924	pbp = bp->bio_parent;
925	sc = pbp->bio_to->private;
926	bp->bio_from->index--;
927	if (bp->bio_cmd == BIO_WRITE)
928		sc->sc_writes--;
929	disk = bp->bio_from->private;
930	if (disk == NULL) {
931		g_topology_lock();
932		g_mirror_kill_consumer(sc, bp->bio_from);
933		g_topology_unlock();
934	}
935
936	pbp->bio_inbed++;
937	KASSERT(pbp->bio_inbed <= pbp->bio_children,
938	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
939	    pbp->bio_children));
940	if (bp->bio_error == 0 && pbp->bio_error == 0) {
941		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
942		g_destroy_bio(bp);
943		if (pbp->bio_children == pbp->bio_inbed) {
944			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
945			pbp->bio_completed = pbp->bio_length;
946			if (pbp->bio_cmd == BIO_WRITE ||
947			    pbp->bio_cmd == BIO_DELETE) {
948				bioq_remove(&sc->sc_inflight, pbp);
949				/* Release delayed sync requests if possible. */
950				g_mirror_sync_release(sc);
951			}
952			g_io_deliver(pbp, pbp->bio_error);
953		}
954		return;
955	} else if (bp->bio_error != 0) {
956		if (pbp->bio_error == 0)
957			pbp->bio_error = bp->bio_error;
958		if (disk != NULL) {
959			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
960				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
961				G_MIRROR_LOGREQ(0, bp,
962				    "Request failed (error=%d).",
963				    bp->bio_error);
964			} else {
965				G_MIRROR_LOGREQ(1, bp,
966				    "Request failed (error=%d).",
967				    bp->bio_error);
968			}
969			if (g_mirror_disconnect_on_failure &&
970			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
971			{
972				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
973				g_mirror_event_send(disk,
974				    G_MIRROR_DISK_STATE_DISCONNECTED,
975				    G_MIRROR_EVENT_DONTWAIT);
976			}
977		}
978		switch (pbp->bio_cmd) {
979		case BIO_DELETE:
980		case BIO_WRITE:
981			pbp->bio_inbed--;
982			pbp->bio_children--;
983			break;
984		}
985	}
986	g_destroy_bio(bp);
987
988	switch (pbp->bio_cmd) {
989	case BIO_READ:
990		if (pbp->bio_inbed < pbp->bio_children)
991			break;
992		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
993			g_io_deliver(pbp, pbp->bio_error);
994		else {
995			pbp->bio_error = 0;
996			mtx_lock(&sc->sc_queue_mtx);
997			bioq_insert_tail(&sc->sc_queue, pbp);
998			mtx_unlock(&sc->sc_queue_mtx);
999			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1000			wakeup(sc);
1001		}
1002		break;
1003	case BIO_DELETE:
1004	case BIO_WRITE:
1005		if (pbp->bio_children == 0) {
1006			/*
1007			 * All requests failed.
1008			 */
1009		} else if (pbp->bio_inbed < pbp->bio_children) {
1010			/* Do nothing. */
1011			break;
1012		} else if (pbp->bio_children == pbp->bio_inbed) {
1013			/* Some requests succeeded. */
1014			pbp->bio_error = 0;
1015			pbp->bio_completed = pbp->bio_length;
1016		}
1017		bioq_remove(&sc->sc_inflight, pbp);
1018		/* Release delayed sync requests if possible. */
1019		g_mirror_sync_release(sc);
1020		g_io_deliver(pbp, pbp->bio_error);
1021		break;
1022	default:
1023		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1024		break;
1025	}
1026}
1027
1028static void
1029g_mirror_sync_done(struct bio *bp)
1030{
1031	struct g_mirror_softc *sc;
1032
1033	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1034	sc = bp->bio_from->geom->softc;
1035	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1036	mtx_lock(&sc->sc_queue_mtx);
1037	bioq_insert_tail(&sc->sc_queue, bp);
1038	mtx_unlock(&sc->sc_queue_mtx);
1039	wakeup(sc);
1040}
1041
1042static void
1043g_mirror_candelete(struct bio *bp)
1044{
1045	struct g_mirror_softc *sc;
1046	struct g_mirror_disk *disk;
1047	int *val;
1048
1049	sc = bp->bio_to->private;
1050	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1051		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1052			break;
1053	}
1054	val = (int *)bp->bio_data;
1055	*val = (disk != NULL);
1056	g_io_deliver(bp, 0);
1057}
1058
1059static void
1060g_mirror_kernel_dump(struct bio *bp)
1061{
1062	struct g_mirror_softc *sc;
1063	struct g_mirror_disk *disk;
1064	struct bio *cbp;
1065	struct g_kerneldump *gkd;
1066
1067	/*
1068	 * We configure dumping to the first component, because this component
1069	 * will be used for reading with 'prefer' balance algorithm.
1070	 * If the component with the highest priority is currently disconnected
1071	 * we will not be able to read the dump after the reboot if it will be
1072	 * connected and synchronized later. Can we do something better?
1073	 */
1074	sc = bp->bio_to->private;
1075	disk = LIST_FIRST(&sc->sc_disks);
1076
1077	gkd = (struct g_kerneldump *)bp->bio_data;
1078	if (gkd->length > bp->bio_to->mediasize)
1079		gkd->length = bp->bio_to->mediasize;
1080	cbp = g_clone_bio(bp);
1081	if (cbp == NULL) {
1082		g_io_deliver(bp, ENOMEM);
1083		return;
1084	}
1085	cbp->bio_done = g_std_done;
1086	g_io_request(cbp, disk->d_consumer);
1087	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1088	    g_mirror_get_diskname(disk));
1089}
1090
1091static void
1092g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1093{
1094	struct bio_queue_head queue;
1095	struct g_mirror_disk *disk;
1096	struct g_consumer *cp;
1097	struct bio *cbp;
1098
1099	bioq_init(&queue);
1100	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1101		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1102			continue;
1103		cbp = g_clone_bio(bp);
1104		if (cbp == NULL) {
1105			while ((cbp = bioq_takefirst(&queue)) != NULL)
1106				g_destroy_bio(cbp);
1107			if (bp->bio_error == 0)
1108				bp->bio_error = ENOMEM;
1109			g_io_deliver(bp, bp->bio_error);
1110			return;
1111		}
1112		bioq_insert_tail(&queue, cbp);
1113		cbp->bio_done = g_mirror_flush_done;
1114		cbp->bio_caller1 = disk;
1115		cbp->bio_to = disk->d_consumer->provider;
1116	}
1117	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1118		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1119		disk = cbp->bio_caller1;
1120		cbp->bio_caller1 = NULL;
1121		cp = disk->d_consumer;
1122		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1123		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1124		    cp->acr, cp->acw, cp->ace));
1125		g_io_request(cbp, disk->d_consumer);
1126	}
1127}
1128
1129static void
1130g_mirror_start(struct bio *bp)
1131{
1132	struct g_mirror_softc *sc;
1133
1134	sc = bp->bio_to->private;
1135	/*
1136	 * If sc == NULL or there are no valid disks, provider's error
1137	 * should be set and g_mirror_start() should not be called at all.
1138	 */
1139	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1140	    ("Provider's error should be set (error=%d)(mirror=%s).",
1141	    bp->bio_to->error, bp->bio_to->name));
1142	G_MIRROR_LOGREQ(3, bp, "Request received.");
1143
1144	switch (bp->bio_cmd) {
1145	case BIO_READ:
1146	case BIO_WRITE:
1147	case BIO_DELETE:
1148		break;
1149	case BIO_FLUSH:
1150		g_mirror_flush(sc, bp);
1151		return;
1152	case BIO_GETATTR:
1153		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1154			g_mirror_candelete(bp);
1155			return;
1156		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1157			g_mirror_kernel_dump(bp);
1158			return;
1159		}
1160		/* FALLTHROUGH */
1161	default:
1162		g_io_deliver(bp, EOPNOTSUPP);
1163		return;
1164	}
1165	mtx_lock(&sc->sc_queue_mtx);
1166	bioq_insert_tail(&sc->sc_queue, bp);
1167	mtx_unlock(&sc->sc_queue_mtx);
1168	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1169	wakeup(sc);
1170}
1171
1172/*
1173 * Return TRUE if the given request is colliding with a in-progress
1174 * synchronization request.
1175 */
1176static int
1177g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1178{
1179	struct g_mirror_disk *disk;
1180	struct bio *sbp;
1181	off_t rstart, rend, sstart, send;
1182	u_int i;
1183
1184	if (sc->sc_sync.ds_ndisks == 0)
1185		return (0);
1186	rstart = bp->bio_offset;
1187	rend = bp->bio_offset + bp->bio_length;
1188	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1189		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1190			continue;
1191		for (i = 0; i < g_mirror_syncreqs; i++) {
1192			sbp = disk->d_sync.ds_bios[i];
1193			if (sbp == NULL)
1194				continue;
1195			sstart = sbp->bio_offset;
1196			send = sbp->bio_offset + sbp->bio_length;
1197			if (rend > sstart && rstart < send)
1198				return (1);
1199		}
1200	}
1201	return (0);
1202}
1203
1204/*
1205 * Return TRUE if the given sync request is colliding with a in-progress regular
1206 * request.
1207 */
1208static int
1209g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1210{
1211	off_t rstart, rend, sstart, send;
1212	struct bio *bp;
1213
1214	if (sc->sc_sync.ds_ndisks == 0)
1215		return (0);
1216	sstart = sbp->bio_offset;
1217	send = sbp->bio_offset + sbp->bio_length;
1218	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1219		rstart = bp->bio_offset;
1220		rend = bp->bio_offset + bp->bio_length;
1221		if (rend > sstart && rstart < send)
1222			return (1);
1223	}
1224	return (0);
1225}
1226
1227/*
1228 * Puts request onto delayed queue.
1229 */
1230static void
1231g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1232{
1233
1234	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1235	bioq_insert_head(&sc->sc_regular_delayed, bp);
1236}
1237
1238/*
1239 * Puts synchronization request onto delayed queue.
1240 */
1241static void
1242g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1243{
1244
1245	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1246	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1247}
1248
1249/*
1250 * Releases delayed regular requests which don't collide anymore with sync
1251 * requests.
1252 */
1253static void
1254g_mirror_regular_release(struct g_mirror_softc *sc)
1255{
1256	struct bio *bp, *bp2;
1257
1258	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1259		if (g_mirror_sync_collision(sc, bp))
1260			continue;
1261		bioq_remove(&sc->sc_regular_delayed, bp);
1262		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1263		mtx_lock(&sc->sc_queue_mtx);
1264		bioq_insert_head(&sc->sc_queue, bp);
1265#if 0
1266		/*
1267		 * wakeup() is not needed, because this function is called from
1268		 * the worker thread.
1269		 */
1270		wakeup(&sc->sc_queue);
1271#endif
1272		mtx_unlock(&sc->sc_queue_mtx);
1273	}
1274}
1275
1276/*
1277 * Releases delayed sync requests which don't collide anymore with regular
1278 * requests.
1279 */
1280static void
1281g_mirror_sync_release(struct g_mirror_softc *sc)
1282{
1283	struct bio *bp, *bp2;
1284
1285	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1286		if (g_mirror_regular_collision(sc, bp))
1287			continue;
1288		bioq_remove(&sc->sc_sync_delayed, bp);
1289		G_MIRROR_LOGREQ(2, bp,
1290		    "Releasing delayed synchronization request.");
1291		g_io_request(bp, bp->bio_from);
1292	}
1293}
1294
1295/*
1296 * Handle synchronization requests.
1297 * Every synchronization request is two-steps process: first, READ request is
1298 * send to active provider and then WRITE request (with read data) to the provider
1299 * being synchronized. When WRITE is finished, new synchronization request is
1300 * send.
1301 */
1302static void
1303g_mirror_sync_request(struct bio *bp)
1304{
1305	struct g_mirror_softc *sc;
1306	struct g_mirror_disk *disk;
1307
1308	bp->bio_from->index--;
1309	sc = bp->bio_from->geom->softc;
1310	disk = bp->bio_from->private;
1311	if (disk == NULL) {
1312		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1313		g_topology_lock();
1314		g_mirror_kill_consumer(sc, bp->bio_from);
1315		g_topology_unlock();
1316		free(bp->bio_data, M_MIRROR);
1317		g_destroy_bio(bp);
1318		sx_xlock(&sc->sc_lock);
1319		return;
1320	}
1321
1322	/*
1323	 * Synchronization request.
1324	 */
1325	switch (bp->bio_cmd) {
1326	case BIO_READ:
1327	    {
1328		struct g_consumer *cp;
1329
1330		if (bp->bio_error != 0) {
1331			G_MIRROR_LOGREQ(0, bp,
1332			    "Synchronization request failed (error=%d).",
1333			    bp->bio_error);
1334			g_destroy_bio(bp);
1335			return;
1336		}
1337		G_MIRROR_LOGREQ(3, bp,
1338		    "Synchronization request half-finished.");
1339		bp->bio_cmd = BIO_WRITE;
1340		bp->bio_cflags = 0;
1341		cp = disk->d_consumer;
1342		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1343		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1344		    cp->acr, cp->acw, cp->ace));
1345		cp->index++;
1346		g_io_request(bp, cp);
1347		return;
1348	    }
1349	case BIO_WRITE:
1350	    {
1351		struct g_mirror_disk_sync *sync;
1352		off_t offset;
1353		void *data;
1354		int i;
1355
1356		if (bp->bio_error != 0) {
1357			G_MIRROR_LOGREQ(0, bp,
1358			    "Synchronization request failed (error=%d).",
1359			    bp->bio_error);
1360			g_destroy_bio(bp);
1361			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1362			g_mirror_event_send(disk,
1363			    G_MIRROR_DISK_STATE_DISCONNECTED,
1364			    G_MIRROR_EVENT_DONTWAIT);
1365			return;
1366		}
1367		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1368		sync = &disk->d_sync;
1369		if (sync->ds_offset >= sc->sc_mediasize ||
1370		    sync->ds_consumer == NULL ||
1371		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1372			/* Don't send more synchronization requests. */
1373			sync->ds_inflight--;
1374			if (sync->ds_bios != NULL) {
1375				i = (int)(uintptr_t)bp->bio_caller1;
1376				sync->ds_bios[i] = NULL;
1377			}
1378			free(bp->bio_data, M_MIRROR);
1379			g_destroy_bio(bp);
1380			if (sync->ds_inflight > 0)
1381				return;
1382			if (sync->ds_consumer == NULL ||
1383			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1384				return;
1385			}
1386			/* Disk up-to-date, activate it. */
1387			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1388			    G_MIRROR_EVENT_DONTWAIT);
1389			return;
1390		}
1391
1392		/* Send next synchronization request. */
1393		data = bp->bio_data;
1394		g_reset_bio(bp);
1395		bp->bio_cmd = BIO_READ;
1396		bp->bio_offset = sync->ds_offset;
1397		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1398		sync->ds_offset += bp->bio_length;
1399		bp->bio_done = g_mirror_sync_done;
1400		bp->bio_data = data;
1401		bp->bio_from = sync->ds_consumer;
1402		bp->bio_to = sc->sc_provider;
1403		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1404		sync->ds_consumer->index++;
1405		/*
1406		 * Delay the request if it is colliding with a regular request.
1407		 */
1408		if (g_mirror_regular_collision(sc, bp))
1409			g_mirror_sync_delay(sc, bp);
1410		else
1411			g_io_request(bp, sync->ds_consumer);
1412
1413		/* Release delayed requests if possible. */
1414		g_mirror_regular_release(sc);
1415
1416		/* Find the smallest offset */
1417		offset = sc->sc_mediasize;
1418		for (i = 0; i < g_mirror_syncreqs; i++) {
1419			bp = sync->ds_bios[i];
1420			if (bp->bio_offset < offset)
1421				offset = bp->bio_offset;
1422		}
1423		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1424			/* Update offset_done on every 100 blocks. */
1425			sync->ds_offset_done = offset;
1426			g_mirror_update_metadata(disk);
1427		}
1428		return;
1429	    }
1430	default:
1431		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1432		    bp->bio_cmd, sc->sc_name));
1433		break;
1434	}
1435}
1436
1437static void
1438g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1439{
1440	struct g_mirror_disk *disk;
1441	struct g_consumer *cp;
1442	struct bio *cbp;
1443
1444	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1445		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1446			break;
1447	}
1448	if (disk == NULL) {
1449		if (bp->bio_error == 0)
1450			bp->bio_error = ENXIO;
1451		g_io_deliver(bp, bp->bio_error);
1452		return;
1453	}
1454	cbp = g_clone_bio(bp);
1455	if (cbp == NULL) {
1456		if (bp->bio_error == 0)
1457			bp->bio_error = ENOMEM;
1458		g_io_deliver(bp, bp->bio_error);
1459		return;
1460	}
1461	/*
1462	 * Fill in the component buf structure.
1463	 */
1464	cp = disk->d_consumer;
1465	cbp->bio_done = g_mirror_done;
1466	cbp->bio_to = cp->provider;
1467	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1468	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1469	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1470	    cp->acw, cp->ace));
1471	cp->index++;
1472	g_io_request(cbp, cp);
1473}
1474
1475static void
1476g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1477{
1478	struct g_mirror_disk *disk;
1479	struct g_consumer *cp;
1480	struct bio *cbp;
1481
1482	disk = g_mirror_get_disk(sc);
1483	if (disk == NULL) {
1484		if (bp->bio_error == 0)
1485			bp->bio_error = ENXIO;
1486		g_io_deliver(bp, bp->bio_error);
1487		return;
1488	}
1489	cbp = g_clone_bio(bp);
1490	if (cbp == NULL) {
1491		if (bp->bio_error == 0)
1492			bp->bio_error = ENOMEM;
1493		g_io_deliver(bp, bp->bio_error);
1494		return;
1495	}
1496	/*
1497	 * Fill in the component buf structure.
1498	 */
1499	cp = disk->d_consumer;
1500	cbp->bio_done = g_mirror_done;
1501	cbp->bio_to = cp->provider;
1502	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1503	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1504	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1505	    cp->acw, cp->ace));
1506	cp->index++;
1507	g_io_request(cbp, cp);
1508}
1509
1510#define TRACK_SIZE  (1 * 1024 * 1024)
1511#define LOAD_SCALE	256
1512#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1513
1514static void
1515g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1516{
1517	struct g_mirror_disk *disk, *dp;
1518	struct g_consumer *cp;
1519	struct bio *cbp;
1520	int prio, best;
1521
1522	/* Find a disk with the smallest load. */
1523	disk = NULL;
1524	best = INT_MAX;
1525	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1526		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1527			continue;
1528		prio = dp->load;
1529		/* If disk head is precisely in position - highly prefer it. */
1530		if (dp->d_last_offset == bp->bio_offset)
1531			prio -= 2 * LOAD_SCALE;
1532		else
1533		/* If disk head is close to position - prefer it. */
1534		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1535			prio -= 1 * LOAD_SCALE;
1536		if (prio <= best) {
1537			disk = dp;
1538			best = prio;
1539		}
1540	}
1541	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1542	cbp = g_clone_bio(bp);
1543	if (cbp == NULL) {
1544		if (bp->bio_error == 0)
1545			bp->bio_error = ENOMEM;
1546		g_io_deliver(bp, bp->bio_error);
1547		return;
1548	}
1549	/*
1550	 * Fill in the component buf structure.
1551	 */
1552	cp = disk->d_consumer;
1553	cbp->bio_done = g_mirror_done;
1554	cbp->bio_to = cp->provider;
1555	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1556	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1557	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1558	    cp->acw, cp->ace));
1559	cp->index++;
1560	/* Remember last head position */
1561	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1562	/* Update loads. */
1563	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1564		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1565		    dp->load * 7) / 8;
1566	}
1567	g_io_request(cbp, cp);
1568}
1569
1570static void
1571g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1572{
1573	struct bio_queue_head queue;
1574	struct g_mirror_disk *disk;
1575	struct g_consumer *cp;
1576	struct bio *cbp;
1577	off_t left, mod, offset, slice;
1578	u_char *data;
1579	u_int ndisks;
1580
1581	if (bp->bio_length <= sc->sc_slice) {
1582		g_mirror_request_round_robin(sc, bp);
1583		return;
1584	}
1585	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1586	slice = bp->bio_length / ndisks;
1587	mod = slice % sc->sc_provider->sectorsize;
1588	if (mod != 0)
1589		slice += sc->sc_provider->sectorsize - mod;
1590	/*
1591	 * Allocate all bios before sending any request, so we can
1592	 * return ENOMEM in nice and clean way.
1593	 */
1594	left = bp->bio_length;
1595	offset = bp->bio_offset;
1596	data = bp->bio_data;
1597	bioq_init(&queue);
1598	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1599		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1600			continue;
1601		cbp = g_clone_bio(bp);
1602		if (cbp == NULL) {
1603			while ((cbp = bioq_takefirst(&queue)) != NULL)
1604				g_destroy_bio(cbp);
1605			if (bp->bio_error == 0)
1606				bp->bio_error = ENOMEM;
1607			g_io_deliver(bp, bp->bio_error);
1608			return;
1609		}
1610		bioq_insert_tail(&queue, cbp);
1611		cbp->bio_done = g_mirror_done;
1612		cbp->bio_caller1 = disk;
1613		cbp->bio_to = disk->d_consumer->provider;
1614		cbp->bio_offset = offset;
1615		cbp->bio_data = data;
1616		cbp->bio_length = MIN(left, slice);
1617		left -= cbp->bio_length;
1618		if (left == 0)
1619			break;
1620		offset += cbp->bio_length;
1621		data += cbp->bio_length;
1622	}
1623	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1624		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1625		disk = cbp->bio_caller1;
1626		cbp->bio_caller1 = NULL;
1627		cp = disk->d_consumer;
1628		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1629		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1630		    cp->acr, cp->acw, cp->ace));
1631		disk->d_consumer->index++;
1632		g_io_request(cbp, disk->d_consumer);
1633	}
1634}
1635
1636static void
1637g_mirror_register_request(struct bio *bp)
1638{
1639	struct g_mirror_softc *sc;
1640
1641	sc = bp->bio_to->private;
1642	switch (bp->bio_cmd) {
1643	case BIO_READ:
1644		switch (sc->sc_balance) {
1645		case G_MIRROR_BALANCE_LOAD:
1646			g_mirror_request_load(sc, bp);
1647			break;
1648		case G_MIRROR_BALANCE_PREFER:
1649			g_mirror_request_prefer(sc, bp);
1650			break;
1651		case G_MIRROR_BALANCE_ROUND_ROBIN:
1652			g_mirror_request_round_robin(sc, bp);
1653			break;
1654		case G_MIRROR_BALANCE_SPLIT:
1655			g_mirror_request_split(sc, bp);
1656			break;
1657		}
1658		return;
1659	case BIO_WRITE:
1660	case BIO_DELETE:
1661	    {
1662		struct g_mirror_disk *disk;
1663		struct g_mirror_disk_sync *sync;
1664		struct bio_queue_head queue;
1665		struct g_consumer *cp;
1666		struct bio *cbp;
1667
1668		/*
1669		 * Delay the request if it is colliding with a synchronization
1670		 * request.
1671		 */
1672		if (g_mirror_sync_collision(sc, bp)) {
1673			g_mirror_regular_delay(sc, bp);
1674			return;
1675		}
1676
1677		if (sc->sc_idle)
1678			g_mirror_unidle(sc);
1679		else
1680			sc->sc_last_write = time_uptime;
1681
1682		/*
1683		 * Allocate all bios before sending any request, so we can
1684		 * return ENOMEM in nice and clean way.
1685		 */
1686		bioq_init(&queue);
1687		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1688			sync = &disk->d_sync;
1689			switch (disk->d_state) {
1690			case G_MIRROR_DISK_STATE_ACTIVE:
1691				break;
1692			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1693				if (bp->bio_offset >= sync->ds_offset)
1694					continue;
1695				break;
1696			default:
1697				continue;
1698			}
1699			if (bp->bio_cmd == BIO_DELETE &&
1700			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1701				continue;
1702			cbp = g_clone_bio(bp);
1703			if (cbp == NULL) {
1704				while ((cbp = bioq_takefirst(&queue)) != NULL)
1705					g_destroy_bio(cbp);
1706				if (bp->bio_error == 0)
1707					bp->bio_error = ENOMEM;
1708				g_io_deliver(bp, bp->bio_error);
1709				return;
1710			}
1711			bioq_insert_tail(&queue, cbp);
1712			cbp->bio_done = g_mirror_done;
1713			cp = disk->d_consumer;
1714			cbp->bio_caller1 = cp;
1715			cbp->bio_to = cp->provider;
1716			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1717			    ("Consumer %s not opened (r%dw%de%d).",
1718			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1719		}
1720		if (bioq_first(&queue) == NULL) {
1721			g_io_deliver(bp, EOPNOTSUPP);
1722			return;
1723		}
1724		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1725			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1726			cp = cbp->bio_caller1;
1727			cbp->bio_caller1 = NULL;
1728			cp->index++;
1729			sc->sc_writes++;
1730			g_io_request(cbp, cp);
1731		}
1732		/*
1733		 * Put request onto inflight queue, so we can check if new
1734		 * synchronization requests don't collide with it.
1735		 */
1736		bioq_insert_tail(&sc->sc_inflight, bp);
1737		/*
1738		 * Bump syncid on first write.
1739		 */
1740		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1741			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1742			g_mirror_bump_syncid(sc);
1743		}
1744		return;
1745	    }
1746	default:
1747		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1748		    bp->bio_cmd, sc->sc_name));
1749		break;
1750	}
1751}
1752
1753static int
1754g_mirror_can_destroy(struct g_mirror_softc *sc)
1755{
1756	struct g_geom *gp;
1757	struct g_consumer *cp;
1758
1759	g_topology_assert();
1760	gp = sc->sc_geom;
1761	if (gp->softc == NULL)
1762		return (1);
1763	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1764		return (0);
1765	LIST_FOREACH(cp, &gp->consumer, consumer) {
1766		if (g_mirror_is_busy(sc, cp))
1767			return (0);
1768	}
1769	gp = sc->sc_sync.ds_geom;
1770	LIST_FOREACH(cp, &gp->consumer, consumer) {
1771		if (g_mirror_is_busy(sc, cp))
1772			return (0);
1773	}
1774	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1775	    sc->sc_name);
1776	return (1);
1777}
1778
1779static int
1780g_mirror_try_destroy(struct g_mirror_softc *sc)
1781{
1782
1783	if (sc->sc_rootmount != NULL) {
1784		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1785		    sc->sc_rootmount);
1786		root_mount_rel(sc->sc_rootmount);
1787		sc->sc_rootmount = NULL;
1788	}
1789	g_topology_lock();
1790	if (!g_mirror_can_destroy(sc)) {
1791		g_topology_unlock();
1792		return (0);
1793	}
1794	sc->sc_geom->softc = NULL;
1795	sc->sc_sync.ds_geom->softc = NULL;
1796	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1797		g_topology_unlock();
1798		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1799		    &sc->sc_worker);
1800		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1801		sx_xunlock(&sc->sc_lock);
1802		wakeup(&sc->sc_worker);
1803		sc->sc_worker = NULL;
1804	} else {
1805		g_topology_unlock();
1806		g_mirror_destroy_device(sc);
1807	}
1808	return (1);
1809}
1810
1811/*
1812 * Worker thread.
1813 */
1814static void
1815g_mirror_worker(void *arg)
1816{
1817	struct g_mirror_softc *sc;
1818	struct g_mirror_event *ep;
1819	struct bio *bp;
1820	int timeout;
1821
1822	sc = arg;
1823	thread_lock(curthread);
1824	sched_prio(curthread, PRIBIO);
1825	thread_unlock(curthread);
1826
1827	sx_xlock(&sc->sc_lock);
1828	for (;;) {
1829		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1830		/*
1831		 * First take a look at events.
1832		 * This is important to handle events before any I/O requests.
1833		 */
1834		ep = g_mirror_event_get(sc);
1835		if (ep != NULL) {
1836			g_mirror_event_remove(sc, ep);
1837			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1838				/* Update only device status. */
1839				G_MIRROR_DEBUG(3,
1840				    "Running event for device %s.",
1841				    sc->sc_name);
1842				ep->e_error = 0;
1843				g_mirror_update_device(sc, true);
1844			} else {
1845				/* Update disk status. */
1846				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1847				     g_mirror_get_diskname(ep->e_disk));
1848				ep->e_error = g_mirror_update_disk(ep->e_disk,
1849				    ep->e_state);
1850				if (ep->e_error == 0)
1851					g_mirror_update_device(sc, false);
1852			}
1853			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1854				KASSERT(ep->e_error == 0,
1855				    ("Error cannot be handled."));
1856				g_mirror_event_free(ep);
1857			} else {
1858				ep->e_flags |= G_MIRROR_EVENT_DONE;
1859				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1860				    ep);
1861				mtx_lock(&sc->sc_events_mtx);
1862				wakeup(ep);
1863				mtx_unlock(&sc->sc_events_mtx);
1864			}
1865			if ((sc->sc_flags &
1866			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1867				if (g_mirror_try_destroy(sc)) {
1868					curthread->td_pflags &= ~TDP_GEOM;
1869					G_MIRROR_DEBUG(1, "Thread exiting.");
1870					kproc_exit(0);
1871				}
1872			}
1873			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1874			continue;
1875		}
1876		/*
1877		 * Check if we can mark array as CLEAN and if we can't take
1878		 * how much seconds should we wait.
1879		 */
1880		timeout = g_mirror_idle(sc, -1);
1881		/*
1882		 * Now I/O requests.
1883		 */
1884		/* Get first request from the queue. */
1885		mtx_lock(&sc->sc_queue_mtx);
1886		bp = bioq_takefirst(&sc->sc_queue);
1887		if (bp == NULL) {
1888			if ((sc->sc_flags &
1889			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1890				mtx_unlock(&sc->sc_queue_mtx);
1891				if (g_mirror_try_destroy(sc)) {
1892					curthread->td_pflags &= ~TDP_GEOM;
1893					G_MIRROR_DEBUG(1, "Thread exiting.");
1894					kproc_exit(0);
1895				}
1896				mtx_lock(&sc->sc_queue_mtx);
1897			}
1898			sx_xunlock(&sc->sc_lock);
1899			/*
1900			 * XXX: We can miss an event here, because an event
1901			 *      can be added without sx-device-lock and without
1902			 *      mtx-queue-lock. Maybe I should just stop using
1903			 *      dedicated mutex for events synchronization and
1904			 *      stick with the queue lock?
1905			 *      The event will hang here until next I/O request
1906			 *      or next event is received.
1907			 */
1908			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1909			    timeout * hz);
1910			sx_xlock(&sc->sc_lock);
1911			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1912			continue;
1913		}
1914		mtx_unlock(&sc->sc_queue_mtx);
1915
1916		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1917		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1918			g_mirror_sync_request(bp);	/* READ */
1919		} else if (bp->bio_to != sc->sc_provider) {
1920			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1921				g_mirror_regular_request(bp);
1922			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1923				g_mirror_sync_request(bp);	/* WRITE */
1924			else {
1925				KASSERT(0,
1926				    ("Invalid request cflags=0x%hx to=%s.",
1927				    bp->bio_cflags, bp->bio_to->name));
1928			}
1929		} else {
1930			g_mirror_register_request(bp);
1931		}
1932		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1933	}
1934}
1935
1936static void
1937g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1938{
1939
1940	sx_assert(&sc->sc_lock, SX_LOCKED);
1941
1942	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1943		return;
1944	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1945		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1946		    g_mirror_get_diskname(disk), sc->sc_name);
1947		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1948	} else if (sc->sc_idle &&
1949	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1950		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1951		    g_mirror_get_diskname(disk), sc->sc_name);
1952		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1953	}
1954}
1955
1956static void
1957g_mirror_sync_start(struct g_mirror_disk *disk)
1958{
1959	struct g_mirror_softc *sc;
1960	struct g_consumer *cp;
1961	struct bio *bp;
1962	int error, i;
1963
1964	g_topology_assert_not();
1965	sc = disk->d_softc;
1966	sx_assert(&sc->sc_lock, SX_LOCKED);
1967
1968	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1969	    ("Disk %s is not marked for synchronization.",
1970	    g_mirror_get_diskname(disk)));
1971	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1972	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1973	    sc->sc_state));
1974
1975	sx_xunlock(&sc->sc_lock);
1976	g_topology_lock();
1977	cp = g_new_consumer(sc->sc_sync.ds_geom);
1978	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
1979	error = g_attach(cp, sc->sc_provider);
1980	KASSERT(error == 0,
1981	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1982	error = g_access(cp, 1, 0, 0);
1983	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1984	g_topology_unlock();
1985	sx_xlock(&sc->sc_lock);
1986
1987	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1988	    g_mirror_get_diskname(disk));
1989	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1990		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1991	KASSERT(disk->d_sync.ds_consumer == NULL,
1992	    ("Sync consumer already exists (device=%s, disk=%s).",
1993	    sc->sc_name, g_mirror_get_diskname(disk)));
1994
1995	disk->d_sync.ds_consumer = cp;
1996	disk->d_sync.ds_consumer->private = disk;
1997	disk->d_sync.ds_consumer->index = 0;
1998
1999	/*
2000	 * Allocate memory for synchronization bios and initialize them.
2001	 */
2002	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2003	    M_MIRROR, M_WAITOK);
2004	for (i = 0; i < g_mirror_syncreqs; i++) {
2005		bp = g_alloc_bio();
2006		disk->d_sync.ds_bios[i] = bp;
2007		bp->bio_parent = NULL;
2008		bp->bio_cmd = BIO_READ;
2009		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2010		bp->bio_cflags = 0;
2011		bp->bio_offset = disk->d_sync.ds_offset;
2012		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2013		disk->d_sync.ds_offset += bp->bio_length;
2014		bp->bio_done = g_mirror_sync_done;
2015		bp->bio_from = disk->d_sync.ds_consumer;
2016		bp->bio_to = sc->sc_provider;
2017		bp->bio_caller1 = (void *)(uintptr_t)i;
2018	}
2019
2020	/* Increase the number of disks in SYNCHRONIZING state. */
2021	sc->sc_sync.ds_ndisks++;
2022	/* Set the number of in-flight synchronization requests. */
2023	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2024
2025	/*
2026	 * Fire off first synchronization requests.
2027	 */
2028	for (i = 0; i < g_mirror_syncreqs; i++) {
2029		bp = disk->d_sync.ds_bios[i];
2030		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2031		disk->d_sync.ds_consumer->index++;
2032		/*
2033		 * Delay the request if it is colliding with a regular request.
2034		 */
2035		if (g_mirror_regular_collision(sc, bp))
2036			g_mirror_sync_delay(sc, bp);
2037		else
2038			g_io_request(bp, disk->d_sync.ds_consumer);
2039	}
2040}
2041
2042/*
2043 * Stop synchronization process.
2044 * type: 0 - synchronization finished
2045 *       1 - synchronization stopped
2046 */
2047static void
2048g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2049{
2050	struct g_mirror_softc *sc;
2051	struct g_consumer *cp;
2052
2053	g_topology_assert_not();
2054	sc = disk->d_softc;
2055	sx_assert(&sc->sc_lock, SX_LOCKED);
2056
2057	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2058	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2059	    g_mirror_disk_state2str(disk->d_state)));
2060	if (disk->d_sync.ds_consumer == NULL)
2061		return;
2062
2063	if (type == 0) {
2064		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2065		    sc->sc_name, g_mirror_get_diskname(disk));
2066	} else /* if (type == 1) */ {
2067		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2068		    sc->sc_name, g_mirror_get_diskname(disk));
2069	}
2070	free(disk->d_sync.ds_bios, M_MIRROR);
2071	disk->d_sync.ds_bios = NULL;
2072	cp = disk->d_sync.ds_consumer;
2073	disk->d_sync.ds_consumer = NULL;
2074	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2075	sc->sc_sync.ds_ndisks--;
2076	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2077	g_topology_lock();
2078	g_mirror_kill_consumer(sc, cp);
2079	g_topology_unlock();
2080	sx_xlock(&sc->sc_lock);
2081}
2082
2083static void
2084g_mirror_launch_provider(struct g_mirror_softc *sc)
2085{
2086	struct g_mirror_disk *disk;
2087	struct g_provider *pp, *dp;
2088
2089	sx_assert(&sc->sc_lock, SX_LOCKED);
2090
2091	g_topology_lock();
2092	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2093	pp->flags |= G_PF_DIRECT_RECEIVE;
2094	pp->mediasize = sc->sc_mediasize;
2095	pp->sectorsize = sc->sc_sectorsize;
2096	pp->stripesize = 0;
2097	pp->stripeoffset = 0;
2098
2099	/* Splitting of unmapped BIO's could work but isn't implemented now */
2100	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2101		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2102
2103	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2104		if (disk->d_consumer && disk->d_consumer->provider) {
2105			dp = disk->d_consumer->provider;
2106			if (dp->stripesize > pp->stripesize) {
2107				pp->stripesize = dp->stripesize;
2108				pp->stripeoffset = dp->stripeoffset;
2109			}
2110			/* A provider underneath us doesn't support unmapped */
2111			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2112				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2113				    "because of %s.", dp->name);
2114				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2115			}
2116		}
2117	}
2118	pp->private = sc;
2119	sc->sc_refcnt++;
2120	sc->sc_provider = pp;
2121	g_error_provider(pp, 0);
2122	g_topology_unlock();
2123	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2124	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2125	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2126		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2127			g_mirror_sync_start(disk);
2128	}
2129}
2130
2131static void
2132g_mirror_destroy_provider(struct g_mirror_softc *sc)
2133{
2134	struct g_mirror_disk *disk;
2135	struct bio *bp;
2136
2137	g_topology_assert_not();
2138	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2139	    sc->sc_name));
2140
2141	g_topology_lock();
2142	g_error_provider(sc->sc_provider, ENXIO);
2143	mtx_lock(&sc->sc_queue_mtx);
2144	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
2145		/*
2146		 * Abort any pending I/O that wasn't generated by us.
2147		 * Synchronization requests and requests destined for individual
2148		 * mirror components can be destroyed immediately.
2149		 */
2150		if (bp->bio_to == sc->sc_provider &&
2151		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2152			g_io_deliver(bp, ENXIO);
2153		} else {
2154			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2155				free(bp->bio_data, M_MIRROR);
2156			g_destroy_bio(bp);
2157		}
2158	}
2159	mtx_unlock(&sc->sc_queue_mtx);
2160	g_wither_provider(sc->sc_provider, ENXIO);
2161	sc->sc_provider = NULL;
2162	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2163	g_topology_unlock();
2164	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2165		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2166			g_mirror_sync_stop(disk, 1);
2167	}
2168}
2169
2170static void
2171g_mirror_go(void *arg)
2172{
2173	struct g_mirror_softc *sc;
2174
2175	sc = arg;
2176	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2177	g_mirror_event_send(sc, 0,
2178	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2179}
2180
2181static u_int
2182g_mirror_determine_state(struct g_mirror_disk *disk)
2183{
2184	struct g_mirror_softc *sc;
2185	u_int state;
2186
2187	sc = disk->d_softc;
2188	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2189		if ((disk->d_flags &
2190		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2191			/* Disk does not need synchronization. */
2192			state = G_MIRROR_DISK_STATE_ACTIVE;
2193		} else {
2194			if ((sc->sc_flags &
2195			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2196			    (disk->d_flags &
2197			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2198				/*
2199				 * We can start synchronization from
2200				 * the stored offset.
2201				 */
2202				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2203			} else {
2204				state = G_MIRROR_DISK_STATE_STALE;
2205			}
2206		}
2207	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2208		/*
2209		 * Reset all synchronization data for this disk,
2210		 * because if it even was synchronized, it was
2211		 * synchronized to disks with different syncid.
2212		 */
2213		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2214		disk->d_sync.ds_offset = 0;
2215		disk->d_sync.ds_offset_done = 0;
2216		disk->d_sync.ds_syncid = sc->sc_syncid;
2217		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2218		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2219			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2220		} else {
2221			state = G_MIRROR_DISK_STATE_STALE;
2222		}
2223	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2224		/*
2225		 * Not good, NOT GOOD!
2226		 * It means that mirror was started on stale disks
2227		 * and more fresh disk just arrive.
2228		 * If there were writes, mirror is broken, sorry.
2229		 * I think the best choice here is don't touch
2230		 * this disk and inform the user loudly.
2231		 */
2232		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2233		    "disk (%s) arrives!! It will not be connected to the "
2234		    "running device.", sc->sc_name,
2235		    g_mirror_get_diskname(disk));
2236		g_mirror_destroy_disk(disk);
2237		state = G_MIRROR_DISK_STATE_NONE;
2238		/* Return immediately, because disk was destroyed. */
2239		return (state);
2240	}
2241	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2242	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2243	return (state);
2244}
2245
2246/*
2247 * Update device state.
2248 */
2249static void
2250g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2251{
2252	struct g_mirror_disk *disk;
2253	u_int state;
2254
2255	sx_assert(&sc->sc_lock, SX_XLOCKED);
2256
2257	switch (sc->sc_state) {
2258	case G_MIRROR_DEVICE_STATE_STARTING:
2259	    {
2260		struct g_mirror_disk *pdisk, *tdisk;
2261		u_int dirty, ndisks, genid, syncid;
2262
2263		KASSERT(sc->sc_provider == NULL,
2264		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2265		/*
2266		 * Are we ready? We are, if all disks are connected or
2267		 * if we have any disks and 'force' is true.
2268		 */
2269		ndisks = g_mirror_ndisks(sc, -1);
2270		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2271			;
2272		} else if (ndisks == 0) {
2273			/*
2274			 * Disks went down in starting phase, so destroy
2275			 * device.
2276			 */
2277			callout_drain(&sc->sc_callout);
2278			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2279			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2280			    sc->sc_rootmount);
2281			root_mount_rel(sc->sc_rootmount);
2282			sc->sc_rootmount = NULL;
2283			return;
2284		} else {
2285			return;
2286		}
2287
2288		/*
2289		 * Activate all disks with the biggest syncid.
2290		 */
2291		if (force) {
2292			/*
2293			 * If 'force' is true, we have been called due to
2294			 * timeout, so don't bother canceling timeout.
2295			 */
2296			ndisks = 0;
2297			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2298				if ((disk->d_flags &
2299				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2300					ndisks++;
2301				}
2302			}
2303			if (ndisks == 0) {
2304				/* No valid disks found, destroy device. */
2305				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2306				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2307				    __LINE__, sc->sc_rootmount);
2308				root_mount_rel(sc->sc_rootmount);
2309				sc->sc_rootmount = NULL;
2310				return;
2311			}
2312		} else {
2313			/* Cancel timeout. */
2314			callout_drain(&sc->sc_callout);
2315		}
2316
2317		/*
2318		 * Find the biggest genid.
2319		 */
2320		genid = 0;
2321		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2322			if (disk->d_genid > genid)
2323				genid = disk->d_genid;
2324		}
2325		sc->sc_genid = genid;
2326		/*
2327		 * Remove all disks without the biggest genid.
2328		 */
2329		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2330			if (disk->d_genid < genid) {
2331				G_MIRROR_DEBUG(0,
2332				    "Component %s (device %s) broken, skipping.",
2333				    g_mirror_get_diskname(disk), sc->sc_name);
2334				g_mirror_destroy_disk(disk);
2335			}
2336		}
2337
2338		/*
2339		 * Find the biggest syncid.
2340		 */
2341		syncid = 0;
2342		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2343			if (disk->d_sync.ds_syncid > syncid)
2344				syncid = disk->d_sync.ds_syncid;
2345		}
2346
2347		/*
2348		 * Here we need to look for dirty disks and if all disks
2349		 * with the biggest syncid are dirty, we have to choose
2350		 * one with the biggest priority and rebuild the rest.
2351		 */
2352		/*
2353		 * Find the number of dirty disks with the biggest syncid.
2354		 * Find the number of disks with the biggest syncid.
2355		 * While here, find a disk with the biggest priority.
2356		 */
2357		dirty = ndisks = 0;
2358		pdisk = NULL;
2359		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2360			if (disk->d_sync.ds_syncid != syncid)
2361				continue;
2362			if ((disk->d_flags &
2363			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2364				continue;
2365			}
2366			ndisks++;
2367			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2368				dirty++;
2369				if (pdisk == NULL ||
2370				    pdisk->d_priority < disk->d_priority) {
2371					pdisk = disk;
2372				}
2373			}
2374		}
2375		if (dirty == 0) {
2376			/* No dirty disks at all, great. */
2377		} else if (dirty == ndisks) {
2378			/*
2379			 * Force synchronization for all dirty disks except one
2380			 * with the biggest priority.
2381			 */
2382			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2383			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2384			    "master disk for synchronization.",
2385			    g_mirror_get_diskname(pdisk), sc->sc_name);
2386			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2387				if (disk->d_sync.ds_syncid != syncid)
2388					continue;
2389				if ((disk->d_flags &
2390				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2391					continue;
2392				}
2393				KASSERT((disk->d_flags &
2394				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2395				    ("Disk %s isn't marked as dirty.",
2396				    g_mirror_get_diskname(disk)));
2397				/* Skip the disk with the biggest priority. */
2398				if (disk == pdisk)
2399					continue;
2400				disk->d_sync.ds_syncid = 0;
2401			}
2402		} else if (dirty < ndisks) {
2403			/*
2404			 * Force synchronization for all dirty disks.
2405			 * We have some non-dirty disks.
2406			 */
2407			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2408				if (disk->d_sync.ds_syncid != syncid)
2409					continue;
2410				if ((disk->d_flags &
2411				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2412					continue;
2413				}
2414				if ((disk->d_flags &
2415				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2416					continue;
2417				}
2418				disk->d_sync.ds_syncid = 0;
2419			}
2420		}
2421
2422		/* Reset hint. */
2423		sc->sc_hint = NULL;
2424		sc->sc_syncid = syncid;
2425		if (force) {
2426			/* Remember to bump syncid on first write. */
2427			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2428		}
2429		state = G_MIRROR_DEVICE_STATE_RUNNING;
2430		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2431		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2432		    g_mirror_device_state2str(state));
2433		sc->sc_state = state;
2434		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2435			state = g_mirror_determine_state(disk);
2436			g_mirror_event_send(disk, state,
2437			    G_MIRROR_EVENT_DONTWAIT);
2438			if (state == G_MIRROR_DISK_STATE_STALE)
2439				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2440		}
2441		break;
2442	    }
2443	case G_MIRROR_DEVICE_STATE_RUNNING:
2444		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2445		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2446			/*
2447			 * No active disks or no disks at all,
2448			 * so destroy device.
2449			 */
2450			if (sc->sc_provider != NULL)
2451				g_mirror_destroy_provider(sc);
2452			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2453			break;
2454		} else if (g_mirror_ndisks(sc,
2455		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2456		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2457			/*
2458			 * We have active disks, launch provider if it doesn't
2459			 * exist.
2460			 */
2461			if (sc->sc_provider == NULL)
2462				g_mirror_launch_provider(sc);
2463			if (sc->sc_rootmount != NULL) {
2464				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2465				    __LINE__, sc->sc_rootmount);
2466				root_mount_rel(sc->sc_rootmount);
2467				sc->sc_rootmount = NULL;
2468			}
2469		}
2470		/*
2471		 * Genid should be bumped immediately, so do it here.
2472		 */
2473		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2474			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2475			g_mirror_bump_genid(sc);
2476		}
2477		break;
2478	default:
2479		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2480		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2481		break;
2482	}
2483}
2484
2485/*
2486 * Update disk state and device state if needed.
2487 */
2488#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2489	"Disk %s state changed from %s to %s (device %s).",		\
2490	g_mirror_get_diskname(disk),					\
2491	g_mirror_disk_state2str(disk->d_state),				\
2492	g_mirror_disk_state2str(state), sc->sc_name)
2493static int
2494g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2495{
2496	struct g_mirror_softc *sc;
2497
2498	sc = disk->d_softc;
2499	sx_assert(&sc->sc_lock, SX_XLOCKED);
2500
2501again:
2502	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2503	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2504	    g_mirror_disk_state2str(state));
2505	switch (state) {
2506	case G_MIRROR_DISK_STATE_NEW:
2507		/*
2508		 * Possible scenarios:
2509		 * 1. New disk arrive.
2510		 */
2511		/* Previous state should be NONE. */
2512		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2513		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2514		    g_mirror_disk_state2str(disk->d_state)));
2515		DISK_STATE_CHANGED();
2516
2517		disk->d_state = state;
2518		if (LIST_EMPTY(&sc->sc_disks))
2519			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2520		else {
2521			struct g_mirror_disk *dp;
2522
2523			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2524				if (disk->d_priority >= dp->d_priority) {
2525					LIST_INSERT_BEFORE(dp, disk, d_next);
2526					dp = NULL;
2527					break;
2528				}
2529				if (LIST_NEXT(dp, d_next) == NULL)
2530					break;
2531			}
2532			if (dp != NULL)
2533				LIST_INSERT_AFTER(dp, disk, d_next);
2534		}
2535		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2536		    sc->sc_name, g_mirror_get_diskname(disk));
2537		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2538			break;
2539		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2540		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2541		    g_mirror_device_state2str(sc->sc_state),
2542		    g_mirror_get_diskname(disk),
2543		    g_mirror_disk_state2str(disk->d_state)));
2544		state = g_mirror_determine_state(disk);
2545		if (state != G_MIRROR_DISK_STATE_NONE)
2546			goto again;
2547		break;
2548	case G_MIRROR_DISK_STATE_ACTIVE:
2549		/*
2550		 * Possible scenarios:
2551		 * 1. New disk does not need synchronization.
2552		 * 2. Synchronization process finished successfully.
2553		 */
2554		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2555		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2556		    g_mirror_device_state2str(sc->sc_state),
2557		    g_mirror_get_diskname(disk),
2558		    g_mirror_disk_state2str(disk->d_state)));
2559		/* Previous state should be NEW or SYNCHRONIZING. */
2560		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2561		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2562		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2563		    g_mirror_disk_state2str(disk->d_state)));
2564		DISK_STATE_CHANGED();
2565
2566		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2567			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2568			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2569			g_mirror_sync_stop(disk, 0);
2570		}
2571		disk->d_state = state;
2572		disk->d_sync.ds_offset = 0;
2573		disk->d_sync.ds_offset_done = 0;
2574		g_mirror_update_idle(sc, disk);
2575		g_mirror_update_metadata(disk);
2576		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2577		    sc->sc_name, g_mirror_get_diskname(disk));
2578		break;
2579	case G_MIRROR_DISK_STATE_STALE:
2580		/*
2581		 * Possible scenarios:
2582		 * 1. Stale disk was connected.
2583		 */
2584		/* Previous state should be NEW. */
2585		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2586		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2587		    g_mirror_disk_state2str(disk->d_state)));
2588		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2589		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2590		    g_mirror_device_state2str(sc->sc_state),
2591		    g_mirror_get_diskname(disk),
2592		    g_mirror_disk_state2str(disk->d_state)));
2593		/*
2594		 * STALE state is only possible if device is marked
2595		 * NOAUTOSYNC.
2596		 */
2597		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2598		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2599		    g_mirror_device_state2str(sc->sc_state),
2600		    g_mirror_get_diskname(disk),
2601		    g_mirror_disk_state2str(disk->d_state)));
2602		DISK_STATE_CHANGED();
2603
2604		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2605		disk->d_state = state;
2606		g_mirror_update_metadata(disk);
2607		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2608		    sc->sc_name, g_mirror_get_diskname(disk));
2609		break;
2610	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2611		/*
2612		 * Possible scenarios:
2613		 * 1. Disk which needs synchronization was connected.
2614		 */
2615		/* Previous state should be NEW. */
2616		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2617		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2618		    g_mirror_disk_state2str(disk->d_state)));
2619		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2620		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2621		    g_mirror_device_state2str(sc->sc_state),
2622		    g_mirror_get_diskname(disk),
2623		    g_mirror_disk_state2str(disk->d_state)));
2624		DISK_STATE_CHANGED();
2625
2626		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2627			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2628		disk->d_state = state;
2629		if (sc->sc_provider != NULL) {
2630			g_mirror_sync_start(disk);
2631			g_mirror_update_metadata(disk);
2632		}
2633		break;
2634	case G_MIRROR_DISK_STATE_DISCONNECTED:
2635		/*
2636		 * Possible scenarios:
2637		 * 1. Device wasn't running yet, but disk disappear.
2638		 * 2. Disk was active and disapppear.
2639		 * 3. Disk disappear during synchronization process.
2640		 */
2641		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2642			/*
2643			 * Previous state should be ACTIVE, STALE or
2644			 * SYNCHRONIZING.
2645			 */
2646			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2647			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2648			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2649			    ("Wrong disk state (%s, %s).",
2650			    g_mirror_get_diskname(disk),
2651			    g_mirror_disk_state2str(disk->d_state)));
2652		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2653			/* Previous state should be NEW. */
2654			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2655			    ("Wrong disk state (%s, %s).",
2656			    g_mirror_get_diskname(disk),
2657			    g_mirror_disk_state2str(disk->d_state)));
2658			/*
2659			 * Reset bumping syncid if disk disappeared in STARTING
2660			 * state.
2661			 */
2662			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2663				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2664#ifdef	INVARIANTS
2665		} else {
2666			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2667			    sc->sc_name,
2668			    g_mirror_device_state2str(sc->sc_state),
2669			    g_mirror_get_diskname(disk),
2670			    g_mirror_disk_state2str(disk->d_state)));
2671#endif
2672		}
2673		DISK_STATE_CHANGED();
2674		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2675		    sc->sc_name, g_mirror_get_diskname(disk));
2676
2677		g_mirror_destroy_disk(disk);
2678		break;
2679	case G_MIRROR_DISK_STATE_DESTROY:
2680	    {
2681		int error;
2682
2683		error = g_mirror_clear_metadata(disk);
2684		if (error != 0) {
2685			G_MIRROR_DEBUG(0,
2686			    "Device %s: failed to clear metadata on %s: %d.",
2687			    sc->sc_name, g_mirror_get_diskname(disk), error);
2688			break;
2689		}
2690		DISK_STATE_CHANGED();
2691		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2692		    sc->sc_name, g_mirror_get_diskname(disk));
2693
2694		g_mirror_destroy_disk(disk);
2695		sc->sc_ndisks--;
2696		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2697			g_mirror_update_metadata(disk);
2698		}
2699		break;
2700	    }
2701	default:
2702		KASSERT(1 == 0, ("Unknown state (%u).", state));
2703		break;
2704	}
2705	return (0);
2706}
2707#undef	DISK_STATE_CHANGED
2708
2709int
2710g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2711{
2712	struct g_provider *pp;
2713	u_char *buf;
2714	int error;
2715
2716	g_topology_assert();
2717
2718	error = g_access(cp, 1, 0, 0);
2719	if (error != 0)
2720		return (error);
2721	pp = cp->provider;
2722	g_topology_unlock();
2723	/* Metadata are stored on last sector. */
2724	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2725	    &error);
2726	g_topology_lock();
2727	g_access(cp, -1, 0, 0);
2728	if (buf == NULL) {
2729		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2730		    cp->provider->name, error);
2731		return (error);
2732	}
2733
2734	/* Decode metadata. */
2735	error = mirror_metadata_decode(buf, md);
2736	g_free(buf);
2737	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2738		return (EINVAL);
2739	if (md->md_version > G_MIRROR_VERSION) {
2740		G_MIRROR_DEBUG(0,
2741		    "Kernel module is too old to handle metadata from %s.",
2742		    cp->provider->name);
2743		return (EINVAL);
2744	}
2745	if (error != 0) {
2746		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2747		    cp->provider->name);
2748		return (error);
2749	}
2750
2751	return (0);
2752}
2753
2754static int
2755g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2756    struct g_mirror_metadata *md)
2757{
2758
2759	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2760		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2761		    pp->name, md->md_did);
2762		return (EEXIST);
2763	}
2764	if (md->md_all != sc->sc_ndisks) {
2765		G_MIRROR_DEBUG(1,
2766		    "Invalid '%s' field on disk %s (device %s), skipping.",
2767		    "md_all", pp->name, sc->sc_name);
2768		return (EINVAL);
2769	}
2770	if (md->md_slice != sc->sc_slice) {
2771		G_MIRROR_DEBUG(1,
2772		    "Invalid '%s' field on disk %s (device %s), skipping.",
2773		    "md_slice", pp->name, sc->sc_name);
2774		return (EINVAL);
2775	}
2776	if (md->md_balance != sc->sc_balance) {
2777		G_MIRROR_DEBUG(1,
2778		    "Invalid '%s' field on disk %s (device %s), skipping.",
2779		    "md_balance", pp->name, sc->sc_name);
2780		return (EINVAL);
2781	}
2782#if 0
2783	if (md->md_mediasize != sc->sc_mediasize) {
2784		G_MIRROR_DEBUG(1,
2785		    "Invalid '%s' field on disk %s (device %s), skipping.",
2786		    "md_mediasize", pp->name, sc->sc_name);
2787		return (EINVAL);
2788	}
2789#endif
2790	if (sc->sc_mediasize > pp->mediasize) {
2791		G_MIRROR_DEBUG(1,
2792		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2793		    sc->sc_name);
2794		return (EINVAL);
2795	}
2796	if (md->md_sectorsize != sc->sc_sectorsize) {
2797		G_MIRROR_DEBUG(1,
2798		    "Invalid '%s' field on disk %s (device %s), skipping.",
2799		    "md_sectorsize", pp->name, sc->sc_name);
2800		return (EINVAL);
2801	}
2802	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2803		G_MIRROR_DEBUG(1,
2804		    "Invalid sector size of disk %s (device %s), skipping.",
2805		    pp->name, sc->sc_name);
2806		return (EINVAL);
2807	}
2808	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2809		G_MIRROR_DEBUG(1,
2810		    "Invalid device flags on disk %s (device %s), skipping.",
2811		    pp->name, sc->sc_name);
2812		return (EINVAL);
2813	}
2814	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2815		G_MIRROR_DEBUG(1,
2816		    "Invalid disk flags on disk %s (device %s), skipping.",
2817		    pp->name, sc->sc_name);
2818		return (EINVAL);
2819	}
2820	return (0);
2821}
2822
2823int
2824g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2825    struct g_mirror_metadata *md)
2826{
2827	struct g_mirror_disk *disk;
2828	int error;
2829
2830	g_topology_assert_not();
2831	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2832
2833	error = g_mirror_check_metadata(sc, pp, md);
2834	if (error != 0)
2835		return (error);
2836	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2837	    md->md_genid < sc->sc_genid) {
2838		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2839		    pp->name, sc->sc_name);
2840		return (EINVAL);
2841	}
2842	disk = g_mirror_init_disk(sc, pp, md, &error);
2843	if (disk == NULL)
2844		return (error);
2845	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2846	    G_MIRROR_EVENT_WAIT);
2847	if (error != 0)
2848		return (error);
2849	if (md->md_version < G_MIRROR_VERSION) {
2850		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2851		    pp->name, md->md_version, G_MIRROR_VERSION);
2852		g_mirror_update_metadata(disk);
2853	}
2854	return (0);
2855}
2856
2857static void
2858g_mirror_destroy_delayed(void *arg, int flag)
2859{
2860	struct g_mirror_softc *sc;
2861	int error;
2862
2863	if (flag == EV_CANCEL) {
2864		G_MIRROR_DEBUG(1, "Destroying canceled.");
2865		return;
2866	}
2867	sc = arg;
2868	g_topology_unlock();
2869	sx_xlock(&sc->sc_lock);
2870	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2871	    ("DESTROY flag set on %s.", sc->sc_name));
2872	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2873	    ("DESTROYING flag not set on %s.", sc->sc_name));
2874	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2875	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2876	if (error != 0) {
2877		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2878		    sc->sc_name, error);
2879		sx_xunlock(&sc->sc_lock);
2880	}
2881	g_topology_lock();
2882}
2883
2884static int
2885g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2886{
2887	struct g_mirror_softc *sc;
2888	int error = 0;
2889
2890	g_topology_assert();
2891	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2892	    acw, ace);
2893
2894	sc = pp->private;
2895	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2896
2897	g_topology_unlock();
2898	sx_xlock(&sc->sc_lock);
2899	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2900	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 ||
2901	    LIST_EMPTY(&sc->sc_disks)) {
2902		if (acr > 0 || acw > 0 || ace > 0)
2903			error = ENXIO;
2904		goto end;
2905	}
2906	sc->sc_provider_open += acr + acw + ace;
2907	if (pp->acw + acw == 0)
2908		g_mirror_idle(sc, 0);
2909	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0 &&
2910	    sc->sc_provider_open == 0)
2911		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2912end:
2913	sx_xunlock(&sc->sc_lock);
2914	g_topology_lock();
2915	return (error);
2916}
2917
2918static struct g_geom *
2919g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2920{
2921	struct g_mirror_softc *sc;
2922	struct g_geom *gp;
2923	int error, timeout;
2924
2925	g_topology_assert();
2926	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2927	    md->md_mid);
2928
2929	/* One disk is minimum. */
2930	if (md->md_all < 1)
2931		return (NULL);
2932	/*
2933	 * Action geom.
2934	 */
2935	gp = g_new_geomf(mp, "%s", md->md_name);
2936	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2937	gp->start = g_mirror_start;
2938	gp->orphan = g_mirror_orphan;
2939	gp->access = g_mirror_access;
2940	gp->dumpconf = g_mirror_dumpconf;
2941
2942	sc->sc_id = md->md_mid;
2943	sc->sc_slice = md->md_slice;
2944	sc->sc_balance = md->md_balance;
2945	sc->sc_mediasize = md->md_mediasize;
2946	sc->sc_sectorsize = md->md_sectorsize;
2947	sc->sc_ndisks = md->md_all;
2948	sc->sc_flags = md->md_mflags;
2949	sc->sc_bump_id = 0;
2950	sc->sc_idle = 1;
2951	sc->sc_last_write = time_uptime;
2952	sc->sc_writes = 0;
2953	sc->sc_refcnt = 1;
2954	sx_init(&sc->sc_lock, "gmirror:lock");
2955	bioq_init(&sc->sc_queue);
2956	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2957	bioq_init(&sc->sc_regular_delayed);
2958	bioq_init(&sc->sc_inflight);
2959	bioq_init(&sc->sc_sync_delayed);
2960	LIST_INIT(&sc->sc_disks);
2961	TAILQ_INIT(&sc->sc_events);
2962	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2963	callout_init(&sc->sc_callout, 1);
2964	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
2965	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2966	gp->softc = sc;
2967	sc->sc_geom = gp;
2968	sc->sc_provider = NULL;
2969	sc->sc_provider_open = 0;
2970	/*
2971	 * Synchronization geom.
2972	 */
2973	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2974	gp->softc = sc;
2975	gp->orphan = g_mirror_orphan;
2976	sc->sc_sync.ds_geom = gp;
2977	sc->sc_sync.ds_ndisks = 0;
2978	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2979	    "g_mirror %s", md->md_name);
2980	if (error != 0) {
2981		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2982		    sc->sc_name);
2983		g_destroy_geom(sc->sc_sync.ds_geom);
2984		g_destroy_geom(sc->sc_geom);
2985		g_mirror_free_device(sc);
2986		return (NULL);
2987	}
2988
2989	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2990	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2991
2992	sc->sc_rootmount = root_mount_hold("GMIRROR");
2993	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2994	/*
2995	 * Run timeout.
2996	 */
2997	timeout = g_mirror_timeout * hz;
2998	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2999	return (sc->sc_geom);
3000}
3001
3002int
3003g_mirror_destroy(struct g_mirror_softc *sc, int how)
3004{
3005	struct g_mirror_disk *disk;
3006
3007	g_topology_assert_not();
3008	sx_assert(&sc->sc_lock, SX_XLOCKED);
3009
3010	if (sc->sc_provider_open != 0 || SCHEDULER_STOPPED()) {
3011		switch (how) {
3012		case G_MIRROR_DESTROY_SOFT:
3013			G_MIRROR_DEBUG(1,
3014			    "Device %s is still open (%d).", sc->sc_name,
3015			    sc->sc_provider_open);
3016			return (EBUSY);
3017		case G_MIRROR_DESTROY_DELAYED:
3018			G_MIRROR_DEBUG(1,
3019			    "Device %s will be destroyed on last close.",
3020			    sc->sc_name);
3021			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3022				if (disk->d_state ==
3023				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3024					g_mirror_sync_stop(disk, 1);
3025				}
3026			}
3027			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
3028			return (EBUSY);
3029		case G_MIRROR_DESTROY_HARD:
3030			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3031			    "can't be definitely removed.", sc->sc_name);
3032		}
3033	}
3034
3035	g_topology_lock();
3036	if (sc->sc_geom->softc == NULL) {
3037		g_topology_unlock();
3038		return (0);
3039	}
3040	sc->sc_geom->softc = NULL;
3041	sc->sc_sync.ds_geom->softc = NULL;
3042	g_topology_unlock();
3043
3044	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3045	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
3046	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3047	sx_xunlock(&sc->sc_lock);
3048	mtx_lock(&sc->sc_queue_mtx);
3049	wakeup(sc);
3050	mtx_unlock(&sc->sc_queue_mtx);
3051	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3052	while (sc->sc_worker != NULL)
3053		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3054	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3055	sx_xlock(&sc->sc_lock);
3056	g_mirror_destroy_device(sc);
3057	return (0);
3058}
3059
3060static void
3061g_mirror_taste_orphan(struct g_consumer *cp)
3062{
3063
3064	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3065	    cp->provider->name));
3066}
3067
3068static struct g_geom *
3069g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3070{
3071	struct g_mirror_metadata md;
3072	struct g_mirror_softc *sc;
3073	struct g_consumer *cp;
3074	struct g_geom *gp;
3075	int error;
3076
3077	g_topology_assert();
3078	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3079	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3080
3081	gp = g_new_geomf(mp, "mirror:taste");
3082	/*
3083	 * This orphan function should be never called.
3084	 */
3085	gp->orphan = g_mirror_taste_orphan;
3086	cp = g_new_consumer(gp);
3087	g_attach(cp, pp);
3088	error = g_mirror_read_metadata(cp, &md);
3089	g_detach(cp);
3090	g_destroy_consumer(cp);
3091	g_destroy_geom(gp);
3092	if (error != 0)
3093		return (NULL);
3094	gp = NULL;
3095
3096	if (md.md_provider[0] != '\0' &&
3097	    !g_compare_names(md.md_provider, pp->name))
3098		return (NULL);
3099	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3100		return (NULL);
3101	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3102		G_MIRROR_DEBUG(0,
3103		    "Device %s: provider %s marked as inactive, skipping.",
3104		    md.md_name, pp->name);
3105		return (NULL);
3106	}
3107	if (g_mirror_debug >= 2)
3108		mirror_metadata_dump(&md);
3109
3110	/*
3111	 * Let's check if device already exists.
3112	 */
3113	sc = NULL;
3114	LIST_FOREACH(gp, &mp->geom, geom) {
3115		sc = gp->softc;
3116		if (sc == NULL)
3117			continue;
3118		if (sc->sc_sync.ds_geom == gp)
3119			continue;
3120		if (strcmp(md.md_name, sc->sc_name) != 0)
3121			continue;
3122		if (md.md_mid != sc->sc_id) {
3123			G_MIRROR_DEBUG(0, "Device %s already configured.",
3124			    sc->sc_name);
3125			return (NULL);
3126		}
3127		break;
3128	}
3129	if (gp == NULL) {
3130		gp = g_mirror_create(mp, &md);
3131		if (gp == NULL) {
3132			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3133			    md.md_name);
3134			return (NULL);
3135		}
3136		sc = gp->softc;
3137	}
3138	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3139	g_topology_unlock();
3140	sx_xlock(&sc->sc_lock);
3141	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3142	error = g_mirror_add_disk(sc, pp, &md);
3143	if (error != 0) {
3144		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3145		    pp->name, gp->name, error);
3146		if (LIST_EMPTY(&sc->sc_disks)) {
3147			g_cancel_event(sc);
3148			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3149			g_topology_lock();
3150			return (NULL);
3151		}
3152		gp = NULL;
3153	}
3154	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3155	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3156		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3157		g_topology_lock();
3158		return (NULL);
3159	}
3160	sx_xunlock(&sc->sc_lock);
3161	g_topology_lock();
3162	return (gp);
3163}
3164
3165static void
3166g_mirror_resize(struct g_consumer *cp)
3167{
3168	struct g_mirror_disk *disk;
3169
3170	g_topology_assert();
3171	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3172
3173	disk = cp->private;
3174	if (disk == NULL)
3175		return;
3176	g_topology_unlock();
3177	g_mirror_update_metadata(disk);
3178	g_topology_lock();
3179}
3180
3181static int
3182g_mirror_destroy_geom(struct gctl_req *req __unused,
3183    struct g_class *mp __unused, struct g_geom *gp)
3184{
3185	struct g_mirror_softc *sc;
3186	int error;
3187
3188	g_topology_unlock();
3189	sc = gp->softc;
3190	sx_xlock(&sc->sc_lock);
3191	g_cancel_event(sc);
3192	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3193	if (error != 0)
3194		sx_xunlock(&sc->sc_lock);
3195	g_topology_lock();
3196	return (error);
3197}
3198
3199static void
3200g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3201    struct g_consumer *cp, struct g_provider *pp)
3202{
3203	struct g_mirror_softc *sc;
3204
3205	g_topology_assert();
3206
3207	sc = gp->softc;
3208	if (sc == NULL)
3209		return;
3210	/* Skip synchronization geom. */
3211	if (gp == sc->sc_sync.ds_geom)
3212		return;
3213	if (pp != NULL) {
3214		/* Nothing here. */
3215	} else if (cp != NULL) {
3216		struct g_mirror_disk *disk;
3217
3218		disk = cp->private;
3219		if (disk == NULL)
3220			return;
3221		g_topology_unlock();
3222		sx_xlock(&sc->sc_lock);
3223		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3224		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3225			sbuf_printf(sb, "%s<Synchronized>", indent);
3226			if (disk->d_sync.ds_offset == 0)
3227				sbuf_printf(sb, "0%%");
3228			else {
3229				sbuf_printf(sb, "%u%%",
3230				    (u_int)((disk->d_sync.ds_offset * 100) /
3231				    sc->sc_provider->mediasize));
3232			}
3233			sbuf_printf(sb, "</Synchronized>\n");
3234			if (disk->d_sync.ds_offset > 0) {
3235				sbuf_printf(sb, "%s<BytesSynced>%jd"
3236				    "</BytesSynced>\n", indent,
3237				    (intmax_t)disk->d_sync.ds_offset);
3238			}
3239		}
3240		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3241		    disk->d_sync.ds_syncid);
3242		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3243		    disk->d_genid);
3244		sbuf_printf(sb, "%s<Flags>", indent);
3245		if (disk->d_flags == 0)
3246			sbuf_printf(sb, "NONE");
3247		else {
3248			int first = 1;
3249
3250#define	ADD_FLAG(flag, name)	do {					\
3251	if ((disk->d_flags & (flag)) != 0) {				\
3252		if (!first)						\
3253			sbuf_printf(sb, ", ");				\
3254		else							\
3255			first = 0;					\
3256		sbuf_printf(sb, name);					\
3257	}								\
3258} while (0)
3259			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3260			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3261			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3262			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3263			    "SYNCHRONIZING");
3264			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3265			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3266#undef	ADD_FLAG
3267		}
3268		sbuf_printf(sb, "</Flags>\n");
3269		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3270		    disk->d_priority);
3271		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3272		    g_mirror_disk_state2str(disk->d_state));
3273		sx_xunlock(&sc->sc_lock);
3274		g_topology_lock();
3275	} else {
3276		g_topology_unlock();
3277		sx_xlock(&sc->sc_lock);
3278		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3279		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3280		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3281		sbuf_printf(sb, "%s<Flags>", indent);
3282		if (sc->sc_flags == 0)
3283			sbuf_printf(sb, "NONE");
3284		else {
3285			int first = 1;
3286
3287#define	ADD_FLAG(flag, name)	do {					\
3288	if ((sc->sc_flags & (flag)) != 0) {				\
3289		if (!first)						\
3290			sbuf_printf(sb, ", ");				\
3291		else							\
3292			first = 0;					\
3293		sbuf_printf(sb, name);					\
3294	}								\
3295} while (0)
3296			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3297			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3298#undef	ADD_FLAG
3299		}
3300		sbuf_printf(sb, "</Flags>\n");
3301		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3302		    (u_int)sc->sc_slice);
3303		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3304		    balance_name(sc->sc_balance));
3305		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3306		    sc->sc_ndisks);
3307		sbuf_printf(sb, "%s<State>", indent);
3308		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3309			sbuf_printf(sb, "%s", "STARTING");
3310		else if (sc->sc_ndisks ==
3311		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3312			sbuf_printf(sb, "%s", "COMPLETE");
3313		else
3314			sbuf_printf(sb, "%s", "DEGRADED");
3315		sbuf_printf(sb, "</State>\n");
3316		sx_xunlock(&sc->sc_lock);
3317		g_topology_lock();
3318	}
3319}
3320
3321static void
3322g_mirror_shutdown_post_sync(void *arg, int howto)
3323{
3324	struct g_class *mp;
3325	struct g_geom *gp, *gp2;
3326	struct g_mirror_softc *sc;
3327	int error;
3328
3329	mp = arg;
3330	g_topology_lock();
3331	g_mirror_shutdown = 1;
3332	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3333		if ((sc = gp->softc) == NULL)
3334			continue;
3335		/* Skip synchronization geom. */
3336		if (gp == sc->sc_sync.ds_geom)
3337			continue;
3338		g_topology_unlock();
3339		sx_xlock(&sc->sc_lock);
3340		g_mirror_idle(sc, -1);
3341		g_cancel_event(sc);
3342		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3343		if (error != 0)
3344			sx_xunlock(&sc->sc_lock);
3345		g_topology_lock();
3346	}
3347	g_topology_unlock();
3348}
3349
3350static void
3351g_mirror_init(struct g_class *mp)
3352{
3353
3354	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3355	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3356	if (g_mirror_post_sync == NULL)
3357		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3358}
3359
3360static void
3361g_mirror_fini(struct g_class *mp)
3362{
3363
3364	if (g_mirror_post_sync != NULL)
3365		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3366}
3367
3368DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3369