g_mirror.c revision 327080
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 327080 2017-12-22 16:14:20Z markj $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fail.h>
33#include <sys/kernel.h>
34#include <sys/module.h>
35#include <sys/limits.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/bio.h>
39#include <sys/sbuf.h>
40#include <sys/sysctl.h>
41#include <sys/malloc.h>
42#include <sys/eventhandler.h>
43#include <vm/uma.h>
44#include <geom/geom.h>
45#include <sys/proc.h>
46#include <sys/kthread.h>
47#include <sys/sched.h>
48#include <geom/mirror/g_mirror.h>
49
50FEATURE(geom_mirror, "GEOM mirroring support");
51
52static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
53
54SYSCTL_DECL(_kern_geom);
55static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
56    "GEOM_MIRROR stuff");
57int g_mirror_debug = 0;
58SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
62    0, "Time to wait on all mirror components");
63static u_int g_mirror_idletime = 5;
64SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
65    &g_mirror_idletime, 0, "Mark components as clean when idling");
66static u_int g_mirror_disconnect_on_failure = 1;
67SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
68    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
69static u_int g_mirror_syncreqs = 2;
70SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
71    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
72static u_int g_mirror_sync_period = 5;
73SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
74    &g_mirror_sync_period, 0,
75    "Metadata update period during synchronization, in seconds");
76
77#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
78	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
79	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
80	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
81} while (0)
82
83static eventhandler_tag g_mirror_post_sync = NULL;
84static int g_mirror_shutdown = 0;
85
86static g_ctl_destroy_geom_t g_mirror_destroy_geom;
87static g_taste_t g_mirror_taste;
88static g_init_t g_mirror_init;
89static g_fini_t g_mirror_fini;
90static g_provgone_t g_mirror_providergone;
91static g_resize_t g_mirror_resize;
92
93struct g_class g_mirror_class = {
94	.name = G_MIRROR_CLASS_NAME,
95	.version = G_VERSION,
96	.ctlreq = g_mirror_config,
97	.taste = g_mirror_taste,
98	.destroy_geom = g_mirror_destroy_geom,
99	.init = g_mirror_init,
100	.fini = g_mirror_fini,
101	.providergone = g_mirror_providergone,
102	.resize = g_mirror_resize
103};
104
105
106static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
107static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
108static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
109static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
110    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
111static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
112static void g_mirror_register_request(struct bio *bp);
113static void g_mirror_sync_release(struct g_mirror_softc *sc);
114
115
116static const char *
117g_mirror_disk_state2str(int state)
118{
119
120	switch (state) {
121	case G_MIRROR_DISK_STATE_NONE:
122		return ("NONE");
123	case G_MIRROR_DISK_STATE_NEW:
124		return ("NEW");
125	case G_MIRROR_DISK_STATE_ACTIVE:
126		return ("ACTIVE");
127	case G_MIRROR_DISK_STATE_STALE:
128		return ("STALE");
129	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
130		return ("SYNCHRONIZING");
131	case G_MIRROR_DISK_STATE_DISCONNECTED:
132		return ("DISCONNECTED");
133	case G_MIRROR_DISK_STATE_DESTROY:
134		return ("DESTROY");
135	default:
136		return ("INVALID");
137	}
138}
139
140static const char *
141g_mirror_device_state2str(int state)
142{
143
144	switch (state) {
145	case G_MIRROR_DEVICE_STATE_STARTING:
146		return ("STARTING");
147	case G_MIRROR_DEVICE_STATE_RUNNING:
148		return ("RUNNING");
149	default:
150		return ("INVALID");
151	}
152}
153
154static const char *
155g_mirror_get_diskname(struct g_mirror_disk *disk)
156{
157
158	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
159		return ("[unknown]");
160	return (disk->d_name);
161}
162
163/*
164 * --- Events handling functions ---
165 * Events in geom_mirror are used to maintain disks and device status
166 * from one thread to simplify locking.
167 */
168static void
169g_mirror_event_free(struct g_mirror_event *ep)
170{
171
172	free(ep, M_MIRROR);
173}
174
175int
176g_mirror_event_send(void *arg, int state, int flags)
177{
178	struct g_mirror_softc *sc;
179	struct g_mirror_disk *disk;
180	struct g_mirror_event *ep;
181	int error;
182
183	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
184	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
185	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
186		disk = NULL;
187		sc = arg;
188	} else {
189		disk = arg;
190		sc = disk->d_softc;
191	}
192	ep->e_disk = disk;
193	ep->e_state = state;
194	ep->e_flags = flags;
195	ep->e_error = 0;
196	mtx_lock(&sc->sc_events_mtx);
197	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
198	mtx_unlock(&sc->sc_events_mtx);
199	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
200	mtx_lock(&sc->sc_queue_mtx);
201	wakeup(sc);
202	mtx_unlock(&sc->sc_queue_mtx);
203	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
204		return (0);
205	sx_assert(&sc->sc_lock, SX_XLOCKED);
206	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
207	sx_xunlock(&sc->sc_lock);
208	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
209		mtx_lock(&sc->sc_events_mtx);
210		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
211		    hz * 5);
212	}
213	error = ep->e_error;
214	g_mirror_event_free(ep);
215	sx_xlock(&sc->sc_lock);
216	return (error);
217}
218
219static struct g_mirror_event *
220g_mirror_event_first(struct g_mirror_softc *sc)
221{
222	struct g_mirror_event *ep;
223
224	mtx_lock(&sc->sc_events_mtx);
225	ep = TAILQ_FIRST(&sc->sc_events);
226	mtx_unlock(&sc->sc_events_mtx);
227	return (ep);
228}
229
230static void
231g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
232{
233
234	mtx_lock(&sc->sc_events_mtx);
235	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
236	mtx_unlock(&sc->sc_events_mtx);
237}
238
239static void
240g_mirror_event_cancel(struct g_mirror_disk *disk)
241{
242	struct g_mirror_softc *sc;
243	struct g_mirror_event *ep, *tmpep;
244
245	sc = disk->d_softc;
246	sx_assert(&sc->sc_lock, SX_XLOCKED);
247
248	mtx_lock(&sc->sc_events_mtx);
249	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
250		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
251			continue;
252		if (ep->e_disk != disk)
253			continue;
254		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
255		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
256			g_mirror_event_free(ep);
257		else {
258			ep->e_error = ECANCELED;
259			wakeup(ep);
260		}
261	}
262	mtx_unlock(&sc->sc_events_mtx);
263}
264
265/*
266 * Return the number of disks in given state.
267 * If state is equal to -1, count all connected disks.
268 */
269u_int
270g_mirror_ndisks(struct g_mirror_softc *sc, int state)
271{
272	struct g_mirror_disk *disk;
273	u_int n = 0;
274
275	sx_assert(&sc->sc_lock, SX_LOCKED);
276
277	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
278		if (state == -1 || disk->d_state == state)
279			n++;
280	}
281	return (n);
282}
283
284/*
285 * Find a disk in mirror by its disk ID.
286 */
287static struct g_mirror_disk *
288g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
289{
290	struct g_mirror_disk *disk;
291
292	sx_assert(&sc->sc_lock, SX_XLOCKED);
293
294	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
295		if (disk->d_id == id)
296			return (disk);
297	}
298	return (NULL);
299}
300
301static u_int
302g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
303{
304	struct bio *bp;
305	u_int nreqs = 0;
306
307	mtx_lock(&sc->sc_queue_mtx);
308	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
309		if (bp->bio_from == cp)
310			nreqs++;
311	}
312	mtx_unlock(&sc->sc_queue_mtx);
313	return (nreqs);
314}
315
316static int
317g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
318{
319
320	if (cp->index > 0) {
321		G_MIRROR_DEBUG(2,
322		    "I/O requests for %s exist, can't destroy it now.",
323		    cp->provider->name);
324		return (1);
325	}
326	if (g_mirror_nrequests(sc, cp) > 0) {
327		G_MIRROR_DEBUG(2,
328		    "I/O requests for %s in queue, can't destroy it now.",
329		    cp->provider->name);
330		return (1);
331	}
332	return (0);
333}
334
335static void
336g_mirror_destroy_consumer(void *arg, int flags __unused)
337{
338	struct g_consumer *cp;
339
340	g_topology_assert();
341
342	cp = arg;
343	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
344	g_detach(cp);
345	g_destroy_consumer(cp);
346}
347
348static void
349g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
350{
351	struct g_provider *pp;
352	int retaste_wait;
353
354	g_topology_assert();
355
356	cp->private = NULL;
357	if (g_mirror_is_busy(sc, cp))
358		return;
359	pp = cp->provider;
360	retaste_wait = 0;
361	if (cp->acw == 1) {
362		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
363			retaste_wait = 1;
364	}
365	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
366	    -cp->acw, -cp->ace, 0);
367	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
368		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
369	if (retaste_wait) {
370		/*
371		 * After retaste event was send (inside g_access()), we can send
372		 * event to detach and destroy consumer.
373		 * A class, which has consumer to the given provider connected
374		 * will not receive retaste event for the provider.
375		 * This is the way how I ignore retaste events when I close
376		 * consumers opened for write: I detach and destroy consumer
377		 * after retaste event is sent.
378		 */
379		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
380		return;
381	}
382	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
383	g_detach(cp);
384	g_destroy_consumer(cp);
385}
386
387static int
388g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
389{
390	struct g_consumer *cp;
391	int error;
392
393	g_topology_assert_not();
394	KASSERT(disk->d_consumer == NULL,
395	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
396
397	g_topology_lock();
398	cp = g_new_consumer(disk->d_softc->sc_geom);
399	cp->flags |= G_CF_DIRECT_RECEIVE;
400	error = g_attach(cp, pp);
401	if (error != 0) {
402		g_destroy_consumer(cp);
403		g_topology_unlock();
404		return (error);
405	}
406	error = g_access(cp, 1, 1, 1);
407	if (error != 0) {
408		g_detach(cp);
409		g_destroy_consumer(cp);
410		g_topology_unlock();
411		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
412		    pp->name, error);
413		return (error);
414	}
415	g_topology_unlock();
416	disk->d_consumer = cp;
417	disk->d_consumer->private = disk;
418	disk->d_consumer->index = 0;
419
420	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
421	return (0);
422}
423
424static void
425g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
426{
427
428	g_topology_assert();
429
430	if (cp == NULL)
431		return;
432	if (cp->provider != NULL)
433		g_mirror_kill_consumer(sc, cp);
434	else
435		g_destroy_consumer(cp);
436}
437
438/*
439 * Initialize disk. This means allocate memory, create consumer, attach it
440 * to the provider and open access (r1w1e1) to it.
441 */
442static struct g_mirror_disk *
443g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
444    struct g_mirror_metadata *md, int *errorp)
445{
446	struct g_mirror_disk *disk;
447	int i, error;
448
449	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
450	if (disk == NULL) {
451		error = ENOMEM;
452		goto fail;
453	}
454	disk->d_softc = sc;
455	error = g_mirror_connect_disk(disk, pp);
456	if (error != 0)
457		goto fail;
458	disk->d_id = md->md_did;
459	disk->d_state = G_MIRROR_DISK_STATE_NONE;
460	disk->d_priority = md->md_priority;
461	disk->d_flags = md->md_dflags;
462	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
463	if (error == 0 && i != 0)
464		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
465	if (md->md_provider[0] != '\0')
466		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
467	disk->d_sync.ds_consumer = NULL;
468	disk->d_sync.ds_offset = md->md_sync_offset;
469	disk->d_sync.ds_offset_done = md->md_sync_offset;
470	disk->d_sync.ds_update_ts = time_uptime;
471	disk->d_genid = md->md_genid;
472	disk->d_sync.ds_syncid = md->md_syncid;
473	if (errorp != NULL)
474		*errorp = 0;
475	return (disk);
476fail:
477	if (errorp != NULL)
478		*errorp = error;
479	if (disk != NULL)
480		free(disk, M_MIRROR);
481	return (NULL);
482}
483
484static void
485g_mirror_destroy_disk(struct g_mirror_disk *disk)
486{
487	struct g_mirror_softc *sc;
488
489	g_topology_assert_not();
490	sc = disk->d_softc;
491	sx_assert(&sc->sc_lock, SX_XLOCKED);
492
493	LIST_REMOVE(disk, d_next);
494	g_mirror_event_cancel(disk);
495	if (sc->sc_hint == disk)
496		sc->sc_hint = NULL;
497	switch (disk->d_state) {
498	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
499		g_mirror_sync_stop(disk, 1);
500		/* FALLTHROUGH */
501	case G_MIRROR_DISK_STATE_NEW:
502	case G_MIRROR_DISK_STATE_STALE:
503	case G_MIRROR_DISK_STATE_ACTIVE:
504		g_topology_lock();
505		g_mirror_disconnect_consumer(sc, disk->d_consumer);
506		g_topology_unlock();
507		free(disk, M_MIRROR);
508		break;
509	default:
510		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
511		    g_mirror_get_diskname(disk),
512		    g_mirror_disk_state2str(disk->d_state)));
513	}
514}
515
516static void
517g_mirror_free_device(struct g_mirror_softc *sc)
518{
519
520	mtx_destroy(&sc->sc_queue_mtx);
521	mtx_destroy(&sc->sc_events_mtx);
522	mtx_destroy(&sc->sc_done_mtx);
523	sx_destroy(&sc->sc_lock);
524	free(sc, M_MIRROR);
525}
526
527static void
528g_mirror_providergone(struct g_provider *pp)
529{
530	struct g_mirror_softc *sc = pp->private;
531
532	if ((--sc->sc_refcnt) == 0)
533		g_mirror_free_device(sc);
534}
535
536static void
537g_mirror_destroy_device(struct g_mirror_softc *sc)
538{
539	struct g_mirror_disk *disk;
540	struct g_mirror_event *ep;
541	struct g_geom *gp;
542	struct g_consumer *cp, *tmpcp;
543
544	g_topology_assert_not();
545	sx_assert(&sc->sc_lock, SX_XLOCKED);
546
547	gp = sc->sc_geom;
548	if (sc->sc_provider != NULL)
549		g_mirror_destroy_provider(sc);
550	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
551	    disk = LIST_FIRST(&sc->sc_disks)) {
552		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
553		g_mirror_update_metadata(disk);
554		g_mirror_destroy_disk(disk);
555	}
556	while ((ep = g_mirror_event_first(sc)) != NULL) {
557		g_mirror_event_remove(sc, ep);
558		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
559			g_mirror_event_free(ep);
560		else {
561			ep->e_error = ECANCELED;
562			ep->e_flags |= G_MIRROR_EVENT_DONE;
563			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
564			mtx_lock(&sc->sc_events_mtx);
565			wakeup(ep);
566			mtx_unlock(&sc->sc_events_mtx);
567		}
568	}
569	callout_drain(&sc->sc_callout);
570
571	g_topology_lock();
572	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
573		g_mirror_disconnect_consumer(sc, cp);
574	}
575	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
576	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
577	g_wither_geom(gp, ENXIO);
578	sx_xunlock(&sc->sc_lock);
579	if ((--sc->sc_refcnt) == 0)
580		g_mirror_free_device(sc);
581	g_topology_unlock();
582}
583
584static void
585g_mirror_orphan(struct g_consumer *cp)
586{
587	struct g_mirror_disk *disk;
588
589	g_topology_assert();
590
591	disk = cp->private;
592	if (disk == NULL)
593		return;
594	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
595	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
596	    G_MIRROR_EVENT_DONTWAIT);
597}
598
599/*
600 * Function should return the next active disk on the list.
601 * It is possible that it will be the same disk as given.
602 * If there are no active disks on list, NULL is returned.
603 */
604static __inline struct g_mirror_disk *
605g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
606{
607	struct g_mirror_disk *dp;
608
609	for (dp = LIST_NEXT(disk, d_next); dp != disk;
610	    dp = LIST_NEXT(dp, d_next)) {
611		if (dp == NULL)
612			dp = LIST_FIRST(&sc->sc_disks);
613		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
614			break;
615	}
616	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
617		return (NULL);
618	return (dp);
619}
620
621static struct g_mirror_disk *
622g_mirror_get_disk(struct g_mirror_softc *sc)
623{
624	struct g_mirror_disk *disk;
625
626	if (sc->sc_hint == NULL) {
627		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
628		if (sc->sc_hint == NULL)
629			return (NULL);
630	}
631	disk = sc->sc_hint;
632	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
633		disk = g_mirror_find_next(sc, disk);
634		if (disk == NULL)
635			return (NULL);
636	}
637	sc->sc_hint = g_mirror_find_next(sc, disk);
638	return (disk);
639}
640
641static int
642g_mirror_write_metadata(struct g_mirror_disk *disk,
643    struct g_mirror_metadata *md)
644{
645	struct g_mirror_softc *sc;
646	struct g_consumer *cp;
647	off_t offset, length;
648	u_char *sector;
649	int error = 0;
650
651	g_topology_assert_not();
652	sc = disk->d_softc;
653	sx_assert(&sc->sc_lock, SX_LOCKED);
654
655	cp = disk->d_consumer;
656	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
657	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
658	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
659	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
660	    cp->acw, cp->ace));
661	length = cp->provider->sectorsize;
662	offset = cp->provider->mediasize - length;
663	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
664	if (md != NULL &&
665	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
666		/*
667		 * Handle the case, when the size of parent provider reduced.
668		 */
669		if (offset < md->md_mediasize)
670			error = ENOSPC;
671		else
672			mirror_metadata_encode(md, sector);
673	}
674	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
675	if (error == 0)
676		error = g_write_data(cp, offset, sector, length);
677	free(sector, M_MIRROR);
678	if (error != 0) {
679		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
680			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
681			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
682			    "(device=%s, error=%d).",
683			    g_mirror_get_diskname(disk), sc->sc_name, error);
684		} else {
685			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
686			    "(device=%s, error=%d).",
687			    g_mirror_get_diskname(disk), sc->sc_name, error);
688		}
689		if (g_mirror_disconnect_on_failure &&
690		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
691			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
692			g_mirror_event_send(disk,
693			    G_MIRROR_DISK_STATE_DISCONNECTED,
694			    G_MIRROR_EVENT_DONTWAIT);
695		}
696	}
697	return (error);
698}
699
700static int
701g_mirror_clear_metadata(struct g_mirror_disk *disk)
702{
703	int error;
704
705	g_topology_assert_not();
706	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
707
708	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
709		return (0);
710	error = g_mirror_write_metadata(disk, NULL);
711	if (error == 0) {
712		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
713		    g_mirror_get_diskname(disk));
714	} else {
715		G_MIRROR_DEBUG(0,
716		    "Cannot clear metadata on disk %s (error=%d).",
717		    g_mirror_get_diskname(disk), error);
718	}
719	return (error);
720}
721
722void
723g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
724    struct g_mirror_metadata *md)
725{
726
727	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
728	md->md_version = G_MIRROR_VERSION;
729	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
730	md->md_mid = sc->sc_id;
731	md->md_all = sc->sc_ndisks;
732	md->md_slice = sc->sc_slice;
733	md->md_balance = sc->sc_balance;
734	md->md_genid = sc->sc_genid;
735	md->md_mediasize = sc->sc_mediasize;
736	md->md_sectorsize = sc->sc_sectorsize;
737	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
738	bzero(md->md_provider, sizeof(md->md_provider));
739	if (disk == NULL) {
740		md->md_did = arc4random();
741		md->md_priority = 0;
742		md->md_syncid = 0;
743		md->md_dflags = 0;
744		md->md_sync_offset = 0;
745		md->md_provsize = 0;
746	} else {
747		md->md_did = disk->d_id;
748		md->md_priority = disk->d_priority;
749		md->md_syncid = disk->d_sync.ds_syncid;
750		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
751		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
752			md->md_sync_offset = disk->d_sync.ds_offset_done;
753		else
754			md->md_sync_offset = 0;
755		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
756			strlcpy(md->md_provider,
757			    disk->d_consumer->provider->name,
758			    sizeof(md->md_provider));
759		}
760		md->md_provsize = disk->d_consumer->provider->mediasize;
761	}
762}
763
764void
765g_mirror_update_metadata(struct g_mirror_disk *disk)
766{
767	struct g_mirror_softc *sc;
768	struct g_mirror_metadata md;
769	int error;
770
771	g_topology_assert_not();
772	sc = disk->d_softc;
773	sx_assert(&sc->sc_lock, SX_LOCKED);
774
775	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
776		return;
777	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
778		g_mirror_fill_metadata(sc, disk, &md);
779	error = g_mirror_write_metadata(disk, &md);
780	if (error == 0) {
781		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
782		    g_mirror_get_diskname(disk));
783	} else {
784		G_MIRROR_DEBUG(0,
785		    "Cannot update metadata on disk %s (error=%d).",
786		    g_mirror_get_diskname(disk), error);
787	}
788}
789
790static void
791g_mirror_bump_syncid(struct g_mirror_softc *sc)
792{
793	struct g_mirror_disk *disk;
794
795	g_topology_assert_not();
796	sx_assert(&sc->sc_lock, SX_XLOCKED);
797	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
798	    ("%s called with no active disks (device=%s).", __func__,
799	    sc->sc_name));
800
801	sc->sc_syncid++;
802	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
803	    sc->sc_syncid);
804	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
805		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
806		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
807			disk->d_sync.ds_syncid = sc->sc_syncid;
808			g_mirror_update_metadata(disk);
809		}
810	}
811}
812
813static void
814g_mirror_bump_genid(struct g_mirror_softc *sc)
815{
816	struct g_mirror_disk *disk;
817
818	g_topology_assert_not();
819	sx_assert(&sc->sc_lock, SX_XLOCKED);
820	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
821	    ("%s called with no active disks (device=%s).", __func__,
822	    sc->sc_name));
823
824	sc->sc_genid++;
825	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
826	    sc->sc_genid);
827	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
828		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
829		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
830			disk->d_genid = sc->sc_genid;
831			g_mirror_update_metadata(disk);
832		}
833	}
834}
835
836static int
837g_mirror_idle(struct g_mirror_softc *sc, int acw)
838{
839	struct g_mirror_disk *disk;
840	int timeout;
841
842	g_topology_assert_not();
843	sx_assert(&sc->sc_lock, SX_XLOCKED);
844
845	if (sc->sc_provider == NULL)
846		return (0);
847	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
848		return (0);
849	if (sc->sc_idle)
850		return (0);
851	if (sc->sc_writes > 0)
852		return (0);
853	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
854		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
855		if (!g_mirror_shutdown && timeout > 0)
856			return (timeout);
857	}
858	sc->sc_idle = 1;
859	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
860		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
861			continue;
862		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
863		    g_mirror_get_diskname(disk), sc->sc_name);
864		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
865		g_mirror_update_metadata(disk);
866	}
867	return (0);
868}
869
870static void
871g_mirror_unidle(struct g_mirror_softc *sc)
872{
873	struct g_mirror_disk *disk;
874
875	g_topology_assert_not();
876	sx_assert(&sc->sc_lock, SX_XLOCKED);
877
878	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
879		return;
880	sc->sc_idle = 0;
881	sc->sc_last_write = time_uptime;
882	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
883		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
884			continue;
885		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
886		    g_mirror_get_diskname(disk), sc->sc_name);
887		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
888		g_mirror_update_metadata(disk);
889	}
890}
891
892static void
893g_mirror_flush_done(struct bio *bp)
894{
895	struct g_mirror_softc *sc;
896	struct bio *pbp;
897
898	pbp = bp->bio_parent;
899	sc = pbp->bio_to->private;
900	mtx_lock(&sc->sc_done_mtx);
901	if (pbp->bio_error == 0)
902		pbp->bio_error = bp->bio_error;
903	pbp->bio_completed += bp->bio_completed;
904	pbp->bio_inbed++;
905	if (pbp->bio_children == pbp->bio_inbed) {
906		mtx_unlock(&sc->sc_done_mtx);
907		g_io_deliver(pbp, pbp->bio_error);
908	} else
909		mtx_unlock(&sc->sc_done_mtx);
910	g_destroy_bio(bp);
911}
912
913static void
914g_mirror_done(struct bio *bp)
915{
916	struct g_mirror_softc *sc;
917
918	sc = bp->bio_from->geom->softc;
919	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
920	mtx_lock(&sc->sc_queue_mtx);
921	bioq_insert_tail(&sc->sc_queue, bp);
922	mtx_unlock(&sc->sc_queue_mtx);
923	wakeup(sc);
924}
925
926static void
927g_mirror_regular_request(struct bio *bp)
928{
929	struct g_mirror_softc *sc;
930	struct g_mirror_disk *disk;
931	struct bio *pbp;
932
933	g_topology_assert_not();
934
935	pbp = bp->bio_parent;
936	sc = pbp->bio_to->private;
937	bp->bio_from->index--;
938	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
939		sc->sc_writes--;
940	disk = bp->bio_from->private;
941	if (disk == NULL) {
942		g_topology_lock();
943		g_mirror_kill_consumer(sc, bp->bio_from);
944		g_topology_unlock();
945	}
946
947	if (bp->bio_cmd == BIO_READ)
948		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
949		    bp->bio_error);
950	else if (bp->bio_cmd == BIO_WRITE)
951		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
952		    bp->bio_error);
953
954	pbp->bio_inbed++;
955	KASSERT(pbp->bio_inbed <= pbp->bio_children,
956	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
957	    pbp->bio_children));
958	if (bp->bio_error == 0 && pbp->bio_error == 0) {
959		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
960		g_destroy_bio(bp);
961		if (pbp->bio_children == pbp->bio_inbed) {
962			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
963			pbp->bio_completed = pbp->bio_length;
964			if (pbp->bio_cmd == BIO_WRITE ||
965			    pbp->bio_cmd == BIO_DELETE) {
966				bioq_remove(&sc->sc_inflight, pbp);
967				/* Release delayed sync requests if possible. */
968				g_mirror_sync_release(sc);
969			}
970			g_io_deliver(pbp, pbp->bio_error);
971		}
972		return;
973	} else if (bp->bio_error != 0) {
974		if (pbp->bio_error == 0)
975			pbp->bio_error = bp->bio_error;
976		if (disk != NULL) {
977			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
978				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
979				G_MIRROR_LOGREQ(0, bp,
980				    "Request failed (error=%d).",
981				    bp->bio_error);
982			} else {
983				G_MIRROR_LOGREQ(1, bp,
984				    "Request failed (error=%d).",
985				    bp->bio_error);
986			}
987			if (g_mirror_disconnect_on_failure &&
988			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
989			{
990				if (bp->bio_error == ENXIO &&
991				    bp->bio_cmd == BIO_READ)
992					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
993				else if (bp->bio_error == ENXIO)
994					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
995				else
996					sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
997				g_mirror_event_send(disk,
998				    G_MIRROR_DISK_STATE_DISCONNECTED,
999				    G_MIRROR_EVENT_DONTWAIT);
1000			}
1001		}
1002		switch (pbp->bio_cmd) {
1003		case BIO_DELETE:
1004		case BIO_WRITE:
1005			pbp->bio_inbed--;
1006			pbp->bio_children--;
1007			break;
1008		}
1009	}
1010	g_destroy_bio(bp);
1011
1012	switch (pbp->bio_cmd) {
1013	case BIO_READ:
1014		if (pbp->bio_inbed < pbp->bio_children)
1015			break;
1016		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1017			g_io_deliver(pbp, pbp->bio_error);
1018		else {
1019			pbp->bio_error = 0;
1020			mtx_lock(&sc->sc_queue_mtx);
1021			bioq_insert_tail(&sc->sc_queue, pbp);
1022			mtx_unlock(&sc->sc_queue_mtx);
1023			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1024			wakeup(sc);
1025		}
1026		break;
1027	case BIO_DELETE:
1028	case BIO_WRITE:
1029		if (pbp->bio_children == 0) {
1030			/*
1031			 * All requests failed.
1032			 */
1033		} else if (pbp->bio_inbed < pbp->bio_children) {
1034			/* Do nothing. */
1035			break;
1036		} else if (pbp->bio_children == pbp->bio_inbed) {
1037			/* Some requests succeeded. */
1038			pbp->bio_error = 0;
1039			pbp->bio_completed = pbp->bio_length;
1040		}
1041		bioq_remove(&sc->sc_inflight, pbp);
1042		/* Release delayed sync requests if possible. */
1043		g_mirror_sync_release(sc);
1044		g_io_deliver(pbp, pbp->bio_error);
1045		break;
1046	default:
1047		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1048		break;
1049	}
1050}
1051
1052static void
1053g_mirror_sync_done(struct bio *bp)
1054{
1055	struct g_mirror_softc *sc;
1056
1057	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1058	sc = bp->bio_from->geom->softc;
1059	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1060	mtx_lock(&sc->sc_queue_mtx);
1061	bioq_insert_tail(&sc->sc_queue, bp);
1062	mtx_unlock(&sc->sc_queue_mtx);
1063	wakeup(sc);
1064}
1065
1066static void
1067g_mirror_candelete(struct bio *bp)
1068{
1069	struct g_mirror_softc *sc;
1070	struct g_mirror_disk *disk;
1071	int *val;
1072
1073	sc = bp->bio_to->private;
1074	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1075		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1076			break;
1077	}
1078	val = (int *)bp->bio_data;
1079	*val = (disk != NULL);
1080	g_io_deliver(bp, 0);
1081}
1082
1083static void
1084g_mirror_kernel_dump(struct bio *bp)
1085{
1086	struct g_mirror_softc *sc;
1087	struct g_mirror_disk *disk;
1088	struct bio *cbp;
1089	struct g_kerneldump *gkd;
1090
1091	/*
1092	 * We configure dumping to the first component, because this component
1093	 * will be used for reading with 'prefer' balance algorithm.
1094	 * If the component with the highest priority is currently disconnected
1095	 * we will not be able to read the dump after the reboot if it will be
1096	 * connected and synchronized later. Can we do something better?
1097	 */
1098	sc = bp->bio_to->private;
1099	disk = LIST_FIRST(&sc->sc_disks);
1100
1101	gkd = (struct g_kerneldump *)bp->bio_data;
1102	if (gkd->length > bp->bio_to->mediasize)
1103		gkd->length = bp->bio_to->mediasize;
1104	cbp = g_clone_bio(bp);
1105	if (cbp == NULL) {
1106		g_io_deliver(bp, ENOMEM);
1107		return;
1108	}
1109	cbp->bio_done = g_std_done;
1110	g_io_request(cbp, disk->d_consumer);
1111	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1112	    g_mirror_get_diskname(disk));
1113}
1114
1115static void
1116g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1117{
1118	struct bio_queue_head queue;
1119	struct g_mirror_disk *disk;
1120	struct g_consumer *cp;
1121	struct bio *cbp;
1122
1123	bioq_init(&queue);
1124	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1125		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1126			continue;
1127		cbp = g_clone_bio(bp);
1128		if (cbp == NULL) {
1129			while ((cbp = bioq_takefirst(&queue)) != NULL)
1130				g_destroy_bio(cbp);
1131			if (bp->bio_error == 0)
1132				bp->bio_error = ENOMEM;
1133			g_io_deliver(bp, bp->bio_error);
1134			return;
1135		}
1136		bioq_insert_tail(&queue, cbp);
1137		cbp->bio_done = g_mirror_flush_done;
1138		cbp->bio_caller1 = disk;
1139		cbp->bio_to = disk->d_consumer->provider;
1140	}
1141	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1142		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1143		disk = cbp->bio_caller1;
1144		cbp->bio_caller1 = NULL;
1145		cp = disk->d_consumer;
1146		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1147		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1148		    cp->acr, cp->acw, cp->ace));
1149		g_io_request(cbp, disk->d_consumer);
1150	}
1151}
1152
1153static void
1154g_mirror_start(struct bio *bp)
1155{
1156	struct g_mirror_softc *sc;
1157
1158	sc = bp->bio_to->private;
1159	/*
1160	 * If sc == NULL or there are no valid disks, provider's error
1161	 * should be set and g_mirror_start() should not be called at all.
1162	 */
1163	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1164	    ("Provider's error should be set (error=%d)(mirror=%s).",
1165	    bp->bio_to->error, bp->bio_to->name));
1166	G_MIRROR_LOGREQ(3, bp, "Request received.");
1167
1168	switch (bp->bio_cmd) {
1169	case BIO_READ:
1170	case BIO_WRITE:
1171	case BIO_DELETE:
1172		break;
1173	case BIO_FLUSH:
1174		g_mirror_flush(sc, bp);
1175		return;
1176	case BIO_GETATTR:
1177		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1178			g_mirror_candelete(bp);
1179			return;
1180		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1181			g_mirror_kernel_dump(bp);
1182			return;
1183		}
1184		/* FALLTHROUGH */
1185	default:
1186		g_io_deliver(bp, EOPNOTSUPP);
1187		return;
1188	}
1189	mtx_lock(&sc->sc_queue_mtx);
1190	if (bp->bio_to->error != 0) {
1191		mtx_unlock(&sc->sc_queue_mtx);
1192		g_io_deliver(bp, bp->bio_to->error);
1193		return;
1194	}
1195	bioq_insert_tail(&sc->sc_queue, bp);
1196	mtx_unlock(&sc->sc_queue_mtx);
1197	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1198	wakeup(sc);
1199}
1200
1201/*
1202 * Return TRUE if the given request is colliding with a in-progress
1203 * synchronization request.
1204 */
1205static bool
1206g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1207{
1208	struct g_mirror_disk *disk;
1209	struct bio *sbp;
1210	off_t rstart, rend, sstart, send;
1211	u_int i;
1212
1213	if (sc->sc_sync.ds_ndisks == 0)
1214		return (false);
1215	rstart = bp->bio_offset;
1216	rend = bp->bio_offset + bp->bio_length;
1217	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1218		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1219			continue;
1220		for (i = 0; i < g_mirror_syncreqs; i++) {
1221			sbp = disk->d_sync.ds_bios[i];
1222			if (sbp == NULL)
1223				continue;
1224			sstart = sbp->bio_offset;
1225			send = sbp->bio_offset + sbp->bio_length;
1226			if (rend > sstart && rstart < send)
1227				return (true);
1228		}
1229	}
1230	return (false);
1231}
1232
1233/*
1234 * Return TRUE if the given sync request is colliding with a in-progress regular
1235 * request.
1236 */
1237static bool
1238g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1239{
1240	off_t rstart, rend, sstart, send;
1241	struct bio *bp;
1242
1243	if (sc->sc_sync.ds_ndisks == 0)
1244		return (false);
1245	sstart = sbp->bio_offset;
1246	send = sbp->bio_offset + sbp->bio_length;
1247	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1248		rstart = bp->bio_offset;
1249		rend = bp->bio_offset + bp->bio_length;
1250		if (rend > sstart && rstart < send)
1251			return (true);
1252	}
1253	return (false);
1254}
1255
1256/*
1257 * Puts request onto delayed queue.
1258 */
1259static void
1260g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1261{
1262
1263	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1264	bioq_insert_head(&sc->sc_regular_delayed, bp);
1265}
1266
1267/*
1268 * Puts synchronization request onto delayed queue.
1269 */
1270static void
1271g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1272{
1273
1274	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1275	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1276}
1277
1278/*
1279 * Releases delayed regular requests which don't collide anymore with sync
1280 * requests.
1281 */
1282static void
1283g_mirror_regular_release(struct g_mirror_softc *sc)
1284{
1285	struct bio *bp, *bp2;
1286
1287	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1288		if (g_mirror_sync_collision(sc, bp))
1289			continue;
1290		bioq_remove(&sc->sc_regular_delayed, bp);
1291		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1292		mtx_lock(&sc->sc_queue_mtx);
1293		bioq_insert_head(&sc->sc_queue, bp);
1294		mtx_unlock(&sc->sc_queue_mtx);
1295	}
1296}
1297
1298/*
1299 * Releases delayed sync requests which don't collide anymore with regular
1300 * requests.
1301 */
1302static void
1303g_mirror_sync_release(struct g_mirror_softc *sc)
1304{
1305	struct bio *bp, *bp2;
1306
1307	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1308		if (g_mirror_regular_collision(sc, bp))
1309			continue;
1310		bioq_remove(&sc->sc_sync_delayed, bp);
1311		G_MIRROR_LOGREQ(2, bp,
1312		    "Releasing delayed synchronization request.");
1313		g_io_request(bp, bp->bio_from);
1314	}
1315}
1316
1317/*
1318 * Free a synchronization request and clear its slot in the array.
1319 */
1320static void
1321g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1322{
1323	int idx;
1324
1325	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1326		idx = (int)(uintptr_t)bp->bio_caller1;
1327		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1328		    ("unexpected sync BIO at %p:%d", disk, idx));
1329		disk->d_sync.ds_bios[idx] = NULL;
1330	}
1331	free(bp->bio_data, M_MIRROR);
1332	g_destroy_bio(bp);
1333}
1334
1335/*
1336 * Handle synchronization requests.
1337 * Every synchronization request is two-steps process: first, READ request is
1338 * send to active provider and then WRITE request (with read data) to the provider
1339 * being synchronized. When WRITE is finished, new synchronization request is
1340 * send.
1341 */
1342static void
1343g_mirror_sync_request(struct bio *bp)
1344{
1345	struct g_mirror_softc *sc;
1346	struct g_mirror_disk *disk;
1347	struct g_mirror_disk_sync *sync;
1348
1349	bp->bio_from->index--;
1350	sc = bp->bio_from->geom->softc;
1351	disk = bp->bio_from->private;
1352	if (disk == NULL) {
1353		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1354		g_topology_lock();
1355		g_mirror_kill_consumer(sc, bp->bio_from);
1356		g_topology_unlock();
1357		g_mirror_sync_request_free(NULL, bp);
1358		sx_xlock(&sc->sc_lock);
1359		return;
1360	}
1361
1362	/*
1363	 * Synchronization request.
1364	 */
1365	switch (bp->bio_cmd) {
1366	case BIO_READ:
1367	    {
1368		struct g_consumer *cp;
1369
1370		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1371		    bp->bio_error);
1372
1373		if (bp->bio_error != 0) {
1374			G_MIRROR_LOGREQ(0, bp,
1375			    "Synchronization request failed (error=%d).",
1376			    bp->bio_error);
1377			g_mirror_sync_request_free(disk, bp);
1378			return;
1379		}
1380		G_MIRROR_LOGREQ(3, bp,
1381		    "Synchronization request half-finished.");
1382		bp->bio_cmd = BIO_WRITE;
1383		bp->bio_cflags = 0;
1384		cp = disk->d_consumer;
1385		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1386		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1387		    cp->acr, cp->acw, cp->ace));
1388		cp->index++;
1389		g_io_request(bp, cp);
1390		return;
1391	    }
1392	case BIO_WRITE:
1393	    {
1394		off_t offset;
1395		void *data;
1396		int i, idx;
1397
1398		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1399		    bp->bio_error);
1400
1401		if (bp->bio_error != 0) {
1402			G_MIRROR_LOGREQ(0, bp,
1403			    "Synchronization request failed (error=%d).",
1404			    bp->bio_error);
1405			g_mirror_sync_request_free(disk, bp);
1406			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1407			g_mirror_event_send(disk,
1408			    G_MIRROR_DISK_STATE_DISCONNECTED,
1409			    G_MIRROR_EVENT_DONTWAIT);
1410			return;
1411		}
1412		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1413		sync = &disk->d_sync;
1414		if (sync->ds_offset >= sc->sc_mediasize ||
1415		    sync->ds_consumer == NULL ||
1416		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1417			/* Don't send more synchronization requests. */
1418			sync->ds_inflight--;
1419			g_mirror_sync_request_free(disk, bp);
1420			if (sync->ds_inflight > 0)
1421				return;
1422			if (sync->ds_consumer == NULL ||
1423			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1424				return;
1425			}
1426			/* Disk up-to-date, activate it. */
1427			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1428			    G_MIRROR_EVENT_DONTWAIT);
1429			return;
1430		}
1431
1432		/* Send next synchronization request. */
1433		data = bp->bio_data;
1434		idx = (int)(uintptr_t)bp->bio_caller1;
1435		g_reset_bio(bp);
1436		bp->bio_cmd = BIO_READ;
1437		bp->bio_offset = sync->ds_offset;
1438		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1439		sync->ds_offset += bp->bio_length;
1440		bp->bio_done = g_mirror_sync_done;
1441		bp->bio_data = data;
1442		bp->bio_from = sync->ds_consumer;
1443		bp->bio_to = sc->sc_provider;
1444		bp->bio_caller1 = (void *)(uintptr_t)idx;
1445		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1446		sync->ds_consumer->index++;
1447		/*
1448		 * Delay the request if it is colliding with a regular request.
1449		 */
1450		if (g_mirror_regular_collision(sc, bp))
1451			g_mirror_sync_delay(sc, bp);
1452		else
1453			g_io_request(bp, sync->ds_consumer);
1454
1455		/* Release delayed requests if possible. */
1456		g_mirror_regular_release(sc);
1457
1458		/* Find the smallest offset */
1459		offset = sc->sc_mediasize;
1460		for (i = 0; i < g_mirror_syncreqs; i++) {
1461			bp = sync->ds_bios[i];
1462			if (bp != NULL && bp->bio_offset < offset)
1463				offset = bp->bio_offset;
1464		}
1465		if (g_mirror_sync_period > 0 &&
1466		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1467			sync->ds_offset_done = offset;
1468			g_mirror_update_metadata(disk);
1469			sync->ds_update_ts = time_uptime;
1470		}
1471		return;
1472	    }
1473	default:
1474		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1475		    bp->bio_cmd, sc->sc_name));
1476		break;
1477	}
1478}
1479
1480static void
1481g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1482{
1483	struct g_mirror_disk *disk;
1484	struct g_consumer *cp;
1485	struct bio *cbp;
1486
1487	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1488		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1489			break;
1490	}
1491	if (disk == NULL) {
1492		if (bp->bio_error == 0)
1493			bp->bio_error = ENXIO;
1494		g_io_deliver(bp, bp->bio_error);
1495		return;
1496	}
1497	cbp = g_clone_bio(bp);
1498	if (cbp == NULL) {
1499		if (bp->bio_error == 0)
1500			bp->bio_error = ENOMEM;
1501		g_io_deliver(bp, bp->bio_error);
1502		return;
1503	}
1504	/*
1505	 * Fill in the component buf structure.
1506	 */
1507	cp = disk->d_consumer;
1508	cbp->bio_done = g_mirror_done;
1509	cbp->bio_to = cp->provider;
1510	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1511	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1512	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1513	    cp->acw, cp->ace));
1514	cp->index++;
1515	g_io_request(cbp, cp);
1516}
1517
1518static void
1519g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1520{
1521	struct g_mirror_disk *disk;
1522	struct g_consumer *cp;
1523	struct bio *cbp;
1524
1525	disk = g_mirror_get_disk(sc);
1526	if (disk == NULL) {
1527		if (bp->bio_error == 0)
1528			bp->bio_error = ENXIO;
1529		g_io_deliver(bp, bp->bio_error);
1530		return;
1531	}
1532	cbp = g_clone_bio(bp);
1533	if (cbp == NULL) {
1534		if (bp->bio_error == 0)
1535			bp->bio_error = ENOMEM;
1536		g_io_deliver(bp, bp->bio_error);
1537		return;
1538	}
1539	/*
1540	 * Fill in the component buf structure.
1541	 */
1542	cp = disk->d_consumer;
1543	cbp->bio_done = g_mirror_done;
1544	cbp->bio_to = cp->provider;
1545	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1546	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1547	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1548	    cp->acw, cp->ace));
1549	cp->index++;
1550	g_io_request(cbp, cp);
1551}
1552
1553#define TRACK_SIZE  (1 * 1024 * 1024)
1554#define LOAD_SCALE	256
1555#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1556
1557static void
1558g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1559{
1560	struct g_mirror_disk *disk, *dp;
1561	struct g_consumer *cp;
1562	struct bio *cbp;
1563	int prio, best;
1564
1565	/* Find a disk with the smallest load. */
1566	disk = NULL;
1567	best = INT_MAX;
1568	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1569		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1570			continue;
1571		prio = dp->load;
1572		/* If disk head is precisely in position - highly prefer it. */
1573		if (dp->d_last_offset == bp->bio_offset)
1574			prio -= 2 * LOAD_SCALE;
1575		else
1576		/* If disk head is close to position - prefer it. */
1577		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1578			prio -= 1 * LOAD_SCALE;
1579		if (prio <= best) {
1580			disk = dp;
1581			best = prio;
1582		}
1583	}
1584	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1585	cbp = g_clone_bio(bp);
1586	if (cbp == NULL) {
1587		if (bp->bio_error == 0)
1588			bp->bio_error = ENOMEM;
1589		g_io_deliver(bp, bp->bio_error);
1590		return;
1591	}
1592	/*
1593	 * Fill in the component buf structure.
1594	 */
1595	cp = disk->d_consumer;
1596	cbp->bio_done = g_mirror_done;
1597	cbp->bio_to = cp->provider;
1598	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1599	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1600	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1601	    cp->acw, cp->ace));
1602	cp->index++;
1603	/* Remember last head position */
1604	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1605	/* Update loads. */
1606	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1607		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1608		    dp->load * 7) / 8;
1609	}
1610	g_io_request(cbp, cp);
1611}
1612
1613static void
1614g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1615{
1616	struct bio_queue_head queue;
1617	struct g_mirror_disk *disk;
1618	struct g_consumer *cp;
1619	struct bio *cbp;
1620	off_t left, mod, offset, slice;
1621	u_char *data;
1622	u_int ndisks;
1623
1624	if (bp->bio_length <= sc->sc_slice) {
1625		g_mirror_request_round_robin(sc, bp);
1626		return;
1627	}
1628	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1629	slice = bp->bio_length / ndisks;
1630	mod = slice % sc->sc_provider->sectorsize;
1631	if (mod != 0)
1632		slice += sc->sc_provider->sectorsize - mod;
1633	/*
1634	 * Allocate all bios before sending any request, so we can
1635	 * return ENOMEM in nice and clean way.
1636	 */
1637	left = bp->bio_length;
1638	offset = bp->bio_offset;
1639	data = bp->bio_data;
1640	bioq_init(&queue);
1641	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1642		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1643			continue;
1644		cbp = g_clone_bio(bp);
1645		if (cbp == NULL) {
1646			while ((cbp = bioq_takefirst(&queue)) != NULL)
1647				g_destroy_bio(cbp);
1648			if (bp->bio_error == 0)
1649				bp->bio_error = ENOMEM;
1650			g_io_deliver(bp, bp->bio_error);
1651			return;
1652		}
1653		bioq_insert_tail(&queue, cbp);
1654		cbp->bio_done = g_mirror_done;
1655		cbp->bio_caller1 = disk;
1656		cbp->bio_to = disk->d_consumer->provider;
1657		cbp->bio_offset = offset;
1658		cbp->bio_data = data;
1659		cbp->bio_length = MIN(left, slice);
1660		left -= cbp->bio_length;
1661		if (left == 0)
1662			break;
1663		offset += cbp->bio_length;
1664		data += cbp->bio_length;
1665	}
1666	while ((cbp = bioq_takefirst(&queue)) != NULL) {
1667		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1668		disk = cbp->bio_caller1;
1669		cbp->bio_caller1 = NULL;
1670		cp = disk->d_consumer;
1671		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1672		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1673		    cp->acr, cp->acw, cp->ace));
1674		disk->d_consumer->index++;
1675		g_io_request(cbp, disk->d_consumer);
1676	}
1677}
1678
1679static void
1680g_mirror_register_request(struct bio *bp)
1681{
1682	struct g_mirror_softc *sc;
1683
1684	sc = bp->bio_to->private;
1685	switch (bp->bio_cmd) {
1686	case BIO_READ:
1687		switch (sc->sc_balance) {
1688		case G_MIRROR_BALANCE_LOAD:
1689			g_mirror_request_load(sc, bp);
1690			break;
1691		case G_MIRROR_BALANCE_PREFER:
1692			g_mirror_request_prefer(sc, bp);
1693			break;
1694		case G_MIRROR_BALANCE_ROUND_ROBIN:
1695			g_mirror_request_round_robin(sc, bp);
1696			break;
1697		case G_MIRROR_BALANCE_SPLIT:
1698			g_mirror_request_split(sc, bp);
1699			break;
1700		}
1701		return;
1702	case BIO_WRITE:
1703	case BIO_DELETE:
1704	    {
1705		struct g_mirror_disk *disk;
1706		struct g_mirror_disk_sync *sync;
1707		struct bio_queue_head queue;
1708		struct g_consumer *cp;
1709		struct bio *cbp;
1710
1711		/*
1712		 * Delay the request if it is colliding with a synchronization
1713		 * request.
1714		 */
1715		if (g_mirror_sync_collision(sc, bp)) {
1716			g_mirror_regular_delay(sc, bp);
1717			return;
1718		}
1719
1720		if (sc->sc_idle)
1721			g_mirror_unidle(sc);
1722		else
1723			sc->sc_last_write = time_uptime;
1724
1725		/*
1726		 * Bump syncid on first write.
1727		 */
1728		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1729			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1730			g_mirror_bump_syncid(sc);
1731		}
1732
1733		/*
1734		 * Allocate all bios before sending any request, so we can
1735		 * return ENOMEM in nice and clean way.
1736		 */
1737		bioq_init(&queue);
1738		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1739			sync = &disk->d_sync;
1740			switch (disk->d_state) {
1741			case G_MIRROR_DISK_STATE_ACTIVE:
1742				break;
1743			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1744				if (bp->bio_offset >= sync->ds_offset)
1745					continue;
1746				break;
1747			default:
1748				continue;
1749			}
1750			if (bp->bio_cmd == BIO_DELETE &&
1751			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1752				continue;
1753			cbp = g_clone_bio(bp);
1754			if (cbp == NULL) {
1755				while ((cbp = bioq_takefirst(&queue)) != NULL)
1756					g_destroy_bio(cbp);
1757				if (bp->bio_error == 0)
1758					bp->bio_error = ENOMEM;
1759				g_io_deliver(bp, bp->bio_error);
1760				return;
1761			}
1762			bioq_insert_tail(&queue, cbp);
1763			cbp->bio_done = g_mirror_done;
1764			cp = disk->d_consumer;
1765			cbp->bio_caller1 = cp;
1766			cbp->bio_to = cp->provider;
1767			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1768			    ("Consumer %s not opened (r%dw%de%d).",
1769			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1770		}
1771		if (bioq_first(&queue) == NULL) {
1772			g_io_deliver(bp, EOPNOTSUPP);
1773			return;
1774		}
1775		while ((cbp = bioq_takefirst(&queue)) != NULL) {
1776			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1777			cp = cbp->bio_caller1;
1778			cbp->bio_caller1 = NULL;
1779			cp->index++;
1780			sc->sc_writes++;
1781			g_io_request(cbp, cp);
1782		}
1783		/*
1784		 * Put request onto inflight queue, so we can check if new
1785		 * synchronization requests don't collide with it.
1786		 */
1787		bioq_insert_tail(&sc->sc_inflight, bp);
1788		return;
1789	    }
1790	default:
1791		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1792		    bp->bio_cmd, sc->sc_name));
1793		break;
1794	}
1795}
1796
1797static int
1798g_mirror_can_destroy(struct g_mirror_softc *sc)
1799{
1800	struct g_geom *gp;
1801	struct g_consumer *cp;
1802
1803	g_topology_assert();
1804	gp = sc->sc_geom;
1805	if (gp->softc == NULL)
1806		return (1);
1807	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1808		return (0);
1809	LIST_FOREACH(cp, &gp->consumer, consumer) {
1810		if (g_mirror_is_busy(sc, cp))
1811			return (0);
1812	}
1813	gp = sc->sc_sync.ds_geom;
1814	LIST_FOREACH(cp, &gp->consumer, consumer) {
1815		if (g_mirror_is_busy(sc, cp))
1816			return (0);
1817	}
1818	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1819	    sc->sc_name);
1820	return (1);
1821}
1822
1823static int
1824g_mirror_try_destroy(struct g_mirror_softc *sc)
1825{
1826
1827	if (sc->sc_rootmount != NULL) {
1828		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1829		    sc->sc_rootmount);
1830		root_mount_rel(sc->sc_rootmount);
1831		sc->sc_rootmount = NULL;
1832	}
1833	g_topology_lock();
1834	if (!g_mirror_can_destroy(sc)) {
1835		g_topology_unlock();
1836		return (0);
1837	}
1838	sc->sc_geom->softc = NULL;
1839	sc->sc_sync.ds_geom->softc = NULL;
1840	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1841		g_topology_unlock();
1842		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1843		    &sc->sc_worker);
1844		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1845		sx_xunlock(&sc->sc_lock);
1846		wakeup(&sc->sc_worker);
1847		sc->sc_worker = NULL;
1848	} else {
1849		g_topology_unlock();
1850		g_mirror_destroy_device(sc);
1851	}
1852	return (1);
1853}
1854
1855/*
1856 * Worker thread.
1857 */
1858static void
1859g_mirror_worker(void *arg)
1860{
1861	struct g_mirror_softc *sc;
1862	struct g_mirror_event *ep;
1863	struct bio *bp;
1864	int timeout;
1865
1866	sc = arg;
1867	thread_lock(curthread);
1868	sched_prio(curthread, PRIBIO);
1869	thread_unlock(curthread);
1870
1871	sx_xlock(&sc->sc_lock);
1872	for (;;) {
1873		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1874		/*
1875		 * First take a look at events.
1876		 * This is important to handle events before any I/O requests.
1877		 */
1878		ep = g_mirror_event_first(sc);
1879		if (ep != NULL) {
1880			g_mirror_event_remove(sc, ep);
1881			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1882				/* Update only device status. */
1883				G_MIRROR_DEBUG(3,
1884				    "Running event for device %s.",
1885				    sc->sc_name);
1886				ep->e_error = 0;
1887				g_mirror_update_device(sc, true);
1888			} else {
1889				/* Update disk status. */
1890				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1891				     g_mirror_get_diskname(ep->e_disk));
1892				ep->e_error = g_mirror_update_disk(ep->e_disk,
1893				    ep->e_state);
1894				if (ep->e_error == 0)
1895					g_mirror_update_device(sc, false);
1896			}
1897			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1898				KASSERT(ep->e_error == 0,
1899				    ("Error cannot be handled."));
1900				g_mirror_event_free(ep);
1901			} else {
1902				ep->e_flags |= G_MIRROR_EVENT_DONE;
1903				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1904				    ep);
1905				mtx_lock(&sc->sc_events_mtx);
1906				wakeup(ep);
1907				mtx_unlock(&sc->sc_events_mtx);
1908			}
1909			if ((sc->sc_flags &
1910			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1911				if (g_mirror_try_destroy(sc)) {
1912					curthread->td_pflags &= ~TDP_GEOM;
1913					G_MIRROR_DEBUG(1, "Thread exiting.");
1914					kproc_exit(0);
1915				}
1916			}
1917			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1918			continue;
1919		}
1920		/*
1921		 * Check if we can mark array as CLEAN and if we can't take
1922		 * how much seconds should we wait.
1923		 */
1924		timeout = g_mirror_idle(sc, -1);
1925		/*
1926		 * Now I/O requests.
1927		 */
1928		/* Get first request from the queue. */
1929		mtx_lock(&sc->sc_queue_mtx);
1930		bp = bioq_takefirst(&sc->sc_queue);
1931		if (bp == NULL) {
1932			if ((sc->sc_flags &
1933			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1934				mtx_unlock(&sc->sc_queue_mtx);
1935				if (g_mirror_try_destroy(sc)) {
1936					curthread->td_pflags &= ~TDP_GEOM;
1937					G_MIRROR_DEBUG(1, "Thread exiting.");
1938					kproc_exit(0);
1939				}
1940				mtx_lock(&sc->sc_queue_mtx);
1941				if (bioq_first(&sc->sc_queue) != NULL) {
1942					mtx_unlock(&sc->sc_queue_mtx);
1943					continue;
1944				}
1945			}
1946			if (g_mirror_event_first(sc) != NULL)
1947				continue;
1948			sx_xunlock(&sc->sc_lock);
1949			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1950			    timeout * hz);
1951			sx_xlock(&sc->sc_lock);
1952			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1953			continue;
1954		}
1955		mtx_unlock(&sc->sc_queue_mtx);
1956
1957		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1958		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1959			g_mirror_sync_request(bp);	/* READ */
1960		} else if (bp->bio_to != sc->sc_provider) {
1961			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1962				g_mirror_regular_request(bp);
1963			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1964				g_mirror_sync_request(bp);	/* WRITE */
1965			else {
1966				KASSERT(0,
1967				    ("Invalid request cflags=0x%hx to=%s.",
1968				    bp->bio_cflags, bp->bio_to->name));
1969			}
1970		} else {
1971			g_mirror_register_request(bp);
1972		}
1973		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1974	}
1975}
1976
1977static void
1978g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1979{
1980
1981	sx_assert(&sc->sc_lock, SX_LOCKED);
1982
1983	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1984		return;
1985	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1986		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
1987		    g_mirror_get_diskname(disk), sc->sc_name);
1988		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1989	} else if (sc->sc_idle &&
1990	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1991		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
1992		    g_mirror_get_diskname(disk), sc->sc_name);
1993		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1994	}
1995}
1996
1997static void
1998g_mirror_sync_start(struct g_mirror_disk *disk)
1999{
2000	struct g_mirror_softc *sc;
2001	struct g_consumer *cp;
2002	struct bio *bp;
2003	int error, i;
2004
2005	g_topology_assert_not();
2006	sc = disk->d_softc;
2007	sx_assert(&sc->sc_lock, SX_LOCKED);
2008
2009	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2010	    ("Disk %s is not marked for synchronization.",
2011	    g_mirror_get_diskname(disk)));
2012	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2013	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2014	    sc->sc_state));
2015
2016	sx_xunlock(&sc->sc_lock);
2017	g_topology_lock();
2018	cp = g_new_consumer(sc->sc_sync.ds_geom);
2019	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2020	error = g_attach(cp, sc->sc_provider);
2021	KASSERT(error == 0,
2022	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2023	error = g_access(cp, 1, 0, 0);
2024	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2025	g_topology_unlock();
2026	sx_xlock(&sc->sc_lock);
2027
2028	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2029	    g_mirror_get_diskname(disk));
2030	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2031		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2032	KASSERT(disk->d_sync.ds_consumer == NULL,
2033	    ("Sync consumer already exists (device=%s, disk=%s).",
2034	    sc->sc_name, g_mirror_get_diskname(disk)));
2035
2036	disk->d_sync.ds_consumer = cp;
2037	disk->d_sync.ds_consumer->private = disk;
2038	disk->d_sync.ds_consumer->index = 0;
2039
2040	/*
2041	 * Allocate memory for synchronization bios and initialize them.
2042	 */
2043	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2044	    M_MIRROR, M_WAITOK);
2045	for (i = 0; i < g_mirror_syncreqs; i++) {
2046		bp = g_alloc_bio();
2047		disk->d_sync.ds_bios[i] = bp;
2048		bp->bio_parent = NULL;
2049		bp->bio_cmd = BIO_READ;
2050		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2051		bp->bio_cflags = 0;
2052		bp->bio_offset = disk->d_sync.ds_offset;
2053		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2054		disk->d_sync.ds_offset += bp->bio_length;
2055		bp->bio_done = g_mirror_sync_done;
2056		bp->bio_from = disk->d_sync.ds_consumer;
2057		bp->bio_to = sc->sc_provider;
2058		bp->bio_caller1 = (void *)(uintptr_t)i;
2059	}
2060
2061	/* Increase the number of disks in SYNCHRONIZING state. */
2062	sc->sc_sync.ds_ndisks++;
2063	/* Set the number of in-flight synchronization requests. */
2064	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2065
2066	/*
2067	 * Fire off first synchronization requests.
2068	 */
2069	for (i = 0; i < g_mirror_syncreqs; i++) {
2070		bp = disk->d_sync.ds_bios[i];
2071		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2072		disk->d_sync.ds_consumer->index++;
2073		/*
2074		 * Delay the request if it is colliding with a regular request.
2075		 */
2076		if (g_mirror_regular_collision(sc, bp))
2077			g_mirror_sync_delay(sc, bp);
2078		else
2079			g_io_request(bp, disk->d_sync.ds_consumer);
2080	}
2081}
2082
2083/*
2084 * Stop synchronization process.
2085 * type: 0 - synchronization finished
2086 *       1 - synchronization stopped
2087 */
2088static void
2089g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2090{
2091	struct g_mirror_softc *sc;
2092	struct g_consumer *cp;
2093
2094	g_topology_assert_not();
2095	sc = disk->d_softc;
2096	sx_assert(&sc->sc_lock, SX_LOCKED);
2097
2098	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2099	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2100	    g_mirror_disk_state2str(disk->d_state)));
2101	if (disk->d_sync.ds_consumer == NULL)
2102		return;
2103
2104	if (type == 0) {
2105		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2106		    sc->sc_name, g_mirror_get_diskname(disk));
2107	} else /* if (type == 1) */ {
2108		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2109		    sc->sc_name, g_mirror_get_diskname(disk));
2110	}
2111	g_mirror_regular_release(sc);
2112	free(disk->d_sync.ds_bios, M_MIRROR);
2113	disk->d_sync.ds_bios = NULL;
2114	cp = disk->d_sync.ds_consumer;
2115	disk->d_sync.ds_consumer = NULL;
2116	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2117	sc->sc_sync.ds_ndisks--;
2118	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2119	g_topology_lock();
2120	g_mirror_kill_consumer(sc, cp);
2121	g_topology_unlock();
2122	sx_xlock(&sc->sc_lock);
2123}
2124
2125static void
2126g_mirror_launch_provider(struct g_mirror_softc *sc)
2127{
2128	struct g_mirror_disk *disk;
2129	struct g_provider *pp, *dp;
2130
2131	sx_assert(&sc->sc_lock, SX_LOCKED);
2132
2133	g_topology_lock();
2134	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2135	pp->flags |= G_PF_DIRECT_RECEIVE;
2136	pp->mediasize = sc->sc_mediasize;
2137	pp->sectorsize = sc->sc_sectorsize;
2138	pp->stripesize = 0;
2139	pp->stripeoffset = 0;
2140
2141	/* Splitting of unmapped BIO's could work but isn't implemented now */
2142	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2143		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2144
2145	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2146		if (disk->d_consumer && disk->d_consumer->provider) {
2147			dp = disk->d_consumer->provider;
2148			if (dp->stripesize > pp->stripesize) {
2149				pp->stripesize = dp->stripesize;
2150				pp->stripeoffset = dp->stripeoffset;
2151			}
2152			/* A provider underneath us doesn't support unmapped */
2153			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2154				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2155				    "because of %s.", dp->name);
2156				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2157			}
2158		}
2159	}
2160	pp->private = sc;
2161	sc->sc_refcnt++;
2162	sc->sc_provider = pp;
2163	g_error_provider(pp, 0);
2164	g_topology_unlock();
2165	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2166	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2167	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2168		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2169			g_mirror_sync_start(disk);
2170	}
2171}
2172
2173static void
2174g_mirror_destroy_provider(struct g_mirror_softc *sc)
2175{
2176	struct g_mirror_disk *disk;
2177	struct bio *bp;
2178
2179	g_topology_assert_not();
2180	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2181	    sc->sc_name));
2182
2183	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2184		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2185			g_mirror_sync_stop(disk, 1);
2186	}
2187
2188	g_topology_lock();
2189	g_error_provider(sc->sc_provider, ENXIO);
2190	mtx_lock(&sc->sc_queue_mtx);
2191	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
2192		/*
2193		 * Abort any pending I/O that wasn't generated by us.
2194		 * Synchronization requests and requests destined for individual
2195		 * mirror components can be destroyed immediately.
2196		 */
2197		if (bp->bio_to == sc->sc_provider &&
2198		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2199			g_io_deliver(bp, ENXIO);
2200		} else {
2201			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2202				free(bp->bio_data, M_MIRROR);
2203			g_destroy_bio(bp);
2204		}
2205	}
2206	mtx_unlock(&sc->sc_queue_mtx);
2207	g_wither_provider(sc->sc_provider, ENXIO);
2208	sc->sc_provider = NULL;
2209	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2210	g_topology_unlock();
2211}
2212
2213static void
2214g_mirror_go(void *arg)
2215{
2216	struct g_mirror_softc *sc;
2217
2218	sc = arg;
2219	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2220	g_mirror_event_send(sc, 0,
2221	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2222}
2223
2224static u_int
2225g_mirror_determine_state(struct g_mirror_disk *disk)
2226{
2227	struct g_mirror_softc *sc;
2228	u_int state;
2229
2230	sc = disk->d_softc;
2231	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2232		if ((disk->d_flags &
2233		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2234		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2235		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2236			/* Disk does not need synchronization. */
2237			state = G_MIRROR_DISK_STATE_ACTIVE;
2238		} else {
2239			if ((sc->sc_flags &
2240			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2241			    (disk->d_flags &
2242			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2243				/*
2244				 * We can start synchronization from
2245				 * the stored offset.
2246				 */
2247				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2248			} else {
2249				state = G_MIRROR_DISK_STATE_STALE;
2250			}
2251		}
2252	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2253		/*
2254		 * Reset all synchronization data for this disk,
2255		 * because if it even was synchronized, it was
2256		 * synchronized to disks with different syncid.
2257		 */
2258		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2259		disk->d_sync.ds_offset = 0;
2260		disk->d_sync.ds_offset_done = 0;
2261		disk->d_sync.ds_syncid = sc->sc_syncid;
2262		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2263		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2264			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2265		} else {
2266			state = G_MIRROR_DISK_STATE_STALE;
2267		}
2268	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2269		/*
2270		 * Not good, NOT GOOD!
2271		 * It means that mirror was started on stale disks
2272		 * and more fresh disk just arrive.
2273		 * If there were writes, mirror is broken, sorry.
2274		 * I think the best choice here is don't touch
2275		 * this disk and inform the user loudly.
2276		 */
2277		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2278		    "disk (%s) arrives!! It will not be connected to the "
2279		    "running device.", sc->sc_name,
2280		    g_mirror_get_diskname(disk));
2281		g_mirror_destroy_disk(disk);
2282		state = G_MIRROR_DISK_STATE_NONE;
2283		/* Return immediately, because disk was destroyed. */
2284		return (state);
2285	}
2286	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2287	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2288	return (state);
2289}
2290
2291/*
2292 * Update device state.
2293 */
2294static void
2295g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2296{
2297	struct g_mirror_disk *disk;
2298	u_int state;
2299
2300	sx_assert(&sc->sc_lock, SX_XLOCKED);
2301
2302	switch (sc->sc_state) {
2303	case G_MIRROR_DEVICE_STATE_STARTING:
2304	    {
2305		struct g_mirror_disk *pdisk, *tdisk;
2306		u_int dirty, ndisks, genid, syncid;
2307		bool broken;
2308
2309		KASSERT(sc->sc_provider == NULL,
2310		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2311		/*
2312		 * Are we ready? We are, if all disks are connected or
2313		 * if we have any disks and 'force' is true.
2314		 */
2315		ndisks = g_mirror_ndisks(sc, -1);
2316		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2317			;
2318		} else if (ndisks == 0) {
2319			/*
2320			 * Disks went down in starting phase, so destroy
2321			 * device.
2322			 */
2323			callout_drain(&sc->sc_callout);
2324			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2325			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2326			    sc->sc_rootmount);
2327			root_mount_rel(sc->sc_rootmount);
2328			sc->sc_rootmount = NULL;
2329			return;
2330		} else {
2331			return;
2332		}
2333
2334		/*
2335		 * Activate all disks with the biggest syncid.
2336		 */
2337		if (force) {
2338			/*
2339			 * If 'force' is true, we have been called due to
2340			 * timeout, so don't bother canceling timeout.
2341			 */
2342			ndisks = 0;
2343			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2344				if ((disk->d_flags &
2345				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2346					ndisks++;
2347				}
2348			}
2349			if (ndisks == 0) {
2350				/* No valid disks found, destroy device. */
2351				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2352				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2353				    __LINE__, sc->sc_rootmount);
2354				root_mount_rel(sc->sc_rootmount);
2355				sc->sc_rootmount = NULL;
2356				return;
2357			}
2358		} else {
2359			/* Cancel timeout. */
2360			callout_drain(&sc->sc_callout);
2361		}
2362
2363		/*
2364		 * Find the biggest genid.
2365		 */
2366		genid = 0;
2367		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2368			if (disk->d_genid > genid)
2369				genid = disk->d_genid;
2370		}
2371		sc->sc_genid = genid;
2372		/*
2373		 * Remove all disks without the biggest genid.
2374		 */
2375		broken = false;
2376		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2377			if (disk->d_genid < genid) {
2378				G_MIRROR_DEBUG(0,
2379				    "Component %s (device %s) broken, skipping.",
2380				    g_mirror_get_diskname(disk), sc->sc_name);
2381				g_mirror_destroy_disk(disk);
2382				/*
2383				 * Bump the syncid in case we discover a healthy
2384				 * replacement disk after starting the mirror.
2385				 */
2386				broken = true;
2387			}
2388		}
2389
2390		/*
2391		 * Find the biggest syncid.
2392		 */
2393		syncid = 0;
2394		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2395			if (disk->d_sync.ds_syncid > syncid)
2396				syncid = disk->d_sync.ds_syncid;
2397		}
2398
2399		/*
2400		 * Here we need to look for dirty disks and if all disks
2401		 * with the biggest syncid are dirty, we have to choose
2402		 * one with the biggest priority and rebuild the rest.
2403		 */
2404		/*
2405		 * Find the number of dirty disks with the biggest syncid.
2406		 * Find the number of disks with the biggest syncid.
2407		 * While here, find a disk with the biggest priority.
2408		 */
2409		dirty = ndisks = 0;
2410		pdisk = NULL;
2411		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2412			if (disk->d_sync.ds_syncid != syncid)
2413				continue;
2414			if ((disk->d_flags &
2415			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2416				continue;
2417			}
2418			ndisks++;
2419			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2420				dirty++;
2421				if (pdisk == NULL ||
2422				    pdisk->d_priority < disk->d_priority) {
2423					pdisk = disk;
2424				}
2425			}
2426		}
2427		if (dirty == 0) {
2428			/* No dirty disks at all, great. */
2429		} else if (dirty == ndisks) {
2430			/*
2431			 * Force synchronization for all dirty disks except one
2432			 * with the biggest priority.
2433			 */
2434			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2435			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2436			    "master disk for synchronization.",
2437			    g_mirror_get_diskname(pdisk), sc->sc_name);
2438			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2439				if (disk->d_sync.ds_syncid != syncid)
2440					continue;
2441				if ((disk->d_flags &
2442				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2443					continue;
2444				}
2445				KASSERT((disk->d_flags &
2446				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2447				    ("Disk %s isn't marked as dirty.",
2448				    g_mirror_get_diskname(disk)));
2449				/* Skip the disk with the biggest priority. */
2450				if (disk == pdisk)
2451					continue;
2452				disk->d_sync.ds_syncid = 0;
2453			}
2454		} else if (dirty < ndisks) {
2455			/*
2456			 * Force synchronization for all dirty disks.
2457			 * We have some non-dirty disks.
2458			 */
2459			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2460				if (disk->d_sync.ds_syncid != syncid)
2461					continue;
2462				if ((disk->d_flags &
2463				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2464					continue;
2465				}
2466				if ((disk->d_flags &
2467				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2468					continue;
2469				}
2470				disk->d_sync.ds_syncid = 0;
2471			}
2472		}
2473
2474		/* Reset hint. */
2475		sc->sc_hint = NULL;
2476		sc->sc_syncid = syncid;
2477		if (force || broken) {
2478			/* Remember to bump syncid on first write. */
2479			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2480		}
2481		state = G_MIRROR_DEVICE_STATE_RUNNING;
2482		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2483		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2484		    g_mirror_device_state2str(state));
2485		sc->sc_state = state;
2486		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2487			state = g_mirror_determine_state(disk);
2488			g_mirror_event_send(disk, state,
2489			    G_MIRROR_EVENT_DONTWAIT);
2490			if (state == G_MIRROR_DISK_STATE_STALE)
2491				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2492		}
2493		break;
2494	    }
2495	case G_MIRROR_DEVICE_STATE_RUNNING:
2496		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2497		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2498			/*
2499			 * No usable disks, so destroy the device.
2500			 */
2501			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2502			break;
2503		} else if (g_mirror_ndisks(sc,
2504		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2505		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2506			/*
2507			 * We have active disks, launch provider if it doesn't
2508			 * exist.
2509			 */
2510			if (sc->sc_provider == NULL)
2511				g_mirror_launch_provider(sc);
2512			if (sc->sc_rootmount != NULL) {
2513				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2514				    __LINE__, sc->sc_rootmount);
2515				root_mount_rel(sc->sc_rootmount);
2516				sc->sc_rootmount = NULL;
2517			}
2518		}
2519		/*
2520		 * Genid should be bumped immediately, so do it here.
2521		 */
2522		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2523			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2524			g_mirror_bump_genid(sc);
2525		}
2526		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2527			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2528			g_mirror_bump_syncid(sc);
2529		}
2530		break;
2531	default:
2532		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2533		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2534		break;
2535	}
2536}
2537
2538/*
2539 * Update disk state and device state if needed.
2540 */
2541#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2542	"Disk %s state changed from %s to %s (device %s).",		\
2543	g_mirror_get_diskname(disk),					\
2544	g_mirror_disk_state2str(disk->d_state),				\
2545	g_mirror_disk_state2str(state), sc->sc_name)
2546static int
2547g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2548{
2549	struct g_mirror_softc *sc;
2550
2551	sc = disk->d_softc;
2552	sx_assert(&sc->sc_lock, SX_XLOCKED);
2553
2554again:
2555	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2556	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2557	    g_mirror_disk_state2str(state));
2558	switch (state) {
2559	case G_MIRROR_DISK_STATE_NEW:
2560		/*
2561		 * Possible scenarios:
2562		 * 1. New disk arrive.
2563		 */
2564		/* Previous state should be NONE. */
2565		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2566		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2567		    g_mirror_disk_state2str(disk->d_state)));
2568		DISK_STATE_CHANGED();
2569
2570		disk->d_state = state;
2571		if (LIST_EMPTY(&sc->sc_disks))
2572			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2573		else {
2574			struct g_mirror_disk *dp;
2575
2576			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2577				if (disk->d_priority >= dp->d_priority) {
2578					LIST_INSERT_BEFORE(dp, disk, d_next);
2579					dp = NULL;
2580					break;
2581				}
2582				if (LIST_NEXT(dp, d_next) == NULL)
2583					break;
2584			}
2585			if (dp != NULL)
2586				LIST_INSERT_AFTER(dp, disk, d_next);
2587		}
2588		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2589		    sc->sc_name, g_mirror_get_diskname(disk));
2590		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2591			break;
2592		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2593		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2594		    g_mirror_device_state2str(sc->sc_state),
2595		    g_mirror_get_diskname(disk),
2596		    g_mirror_disk_state2str(disk->d_state)));
2597		state = g_mirror_determine_state(disk);
2598		if (state != G_MIRROR_DISK_STATE_NONE)
2599			goto again;
2600		break;
2601	case G_MIRROR_DISK_STATE_ACTIVE:
2602		/*
2603		 * Possible scenarios:
2604		 * 1. New disk does not need synchronization.
2605		 * 2. Synchronization process finished successfully.
2606		 */
2607		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2608		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2609		    g_mirror_device_state2str(sc->sc_state),
2610		    g_mirror_get_diskname(disk),
2611		    g_mirror_disk_state2str(disk->d_state)));
2612		/* Previous state should be NEW or SYNCHRONIZING. */
2613		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2614		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2615		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2616		    g_mirror_disk_state2str(disk->d_state)));
2617		DISK_STATE_CHANGED();
2618
2619		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2620			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2621			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2622			g_mirror_sync_stop(disk, 0);
2623		}
2624		disk->d_state = state;
2625		disk->d_sync.ds_offset = 0;
2626		disk->d_sync.ds_offset_done = 0;
2627		g_mirror_update_idle(sc, disk);
2628		g_mirror_update_metadata(disk);
2629		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2630		    sc->sc_name, g_mirror_get_diskname(disk));
2631		break;
2632	case G_MIRROR_DISK_STATE_STALE:
2633		/*
2634		 * Possible scenarios:
2635		 * 1. Stale disk was connected.
2636		 */
2637		/* Previous state should be NEW. */
2638		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2639		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2640		    g_mirror_disk_state2str(disk->d_state)));
2641		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2642		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2643		    g_mirror_device_state2str(sc->sc_state),
2644		    g_mirror_get_diskname(disk),
2645		    g_mirror_disk_state2str(disk->d_state)));
2646		/*
2647		 * STALE state is only possible if device is marked
2648		 * NOAUTOSYNC.
2649		 */
2650		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2651		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2652		    g_mirror_device_state2str(sc->sc_state),
2653		    g_mirror_get_diskname(disk),
2654		    g_mirror_disk_state2str(disk->d_state)));
2655		DISK_STATE_CHANGED();
2656
2657		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2658		disk->d_state = state;
2659		g_mirror_update_metadata(disk);
2660		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2661		    sc->sc_name, g_mirror_get_diskname(disk));
2662		break;
2663	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2664		/*
2665		 * Possible scenarios:
2666		 * 1. Disk which needs synchronization was connected.
2667		 */
2668		/* Previous state should be NEW. */
2669		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2670		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2671		    g_mirror_disk_state2str(disk->d_state)));
2672		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2673		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2674		    g_mirror_device_state2str(sc->sc_state),
2675		    g_mirror_get_diskname(disk),
2676		    g_mirror_disk_state2str(disk->d_state)));
2677		DISK_STATE_CHANGED();
2678
2679		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2680			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2681		disk->d_state = state;
2682		if (sc->sc_provider != NULL) {
2683			g_mirror_sync_start(disk);
2684			g_mirror_update_metadata(disk);
2685		}
2686		break;
2687	case G_MIRROR_DISK_STATE_DISCONNECTED:
2688		/*
2689		 * Possible scenarios:
2690		 * 1. Device wasn't running yet, but disk disappear.
2691		 * 2. Disk was active and disapppear.
2692		 * 3. Disk disappear during synchronization process.
2693		 */
2694		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2695			/*
2696			 * Previous state should be ACTIVE, STALE or
2697			 * SYNCHRONIZING.
2698			 */
2699			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2700			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2701			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2702			    ("Wrong disk state (%s, %s).",
2703			    g_mirror_get_diskname(disk),
2704			    g_mirror_disk_state2str(disk->d_state)));
2705		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2706			/* Previous state should be NEW. */
2707			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2708			    ("Wrong disk state (%s, %s).",
2709			    g_mirror_get_diskname(disk),
2710			    g_mirror_disk_state2str(disk->d_state)));
2711			/*
2712			 * Reset bumping syncid if disk disappeared in STARTING
2713			 * state.
2714			 */
2715			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2716				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2717#ifdef	INVARIANTS
2718		} else {
2719			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2720			    sc->sc_name,
2721			    g_mirror_device_state2str(sc->sc_state),
2722			    g_mirror_get_diskname(disk),
2723			    g_mirror_disk_state2str(disk->d_state)));
2724#endif
2725		}
2726		DISK_STATE_CHANGED();
2727		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2728		    sc->sc_name, g_mirror_get_diskname(disk));
2729
2730		g_mirror_destroy_disk(disk);
2731		break;
2732	case G_MIRROR_DISK_STATE_DESTROY:
2733	    {
2734		int error;
2735
2736		error = g_mirror_clear_metadata(disk);
2737		if (error != 0) {
2738			G_MIRROR_DEBUG(0,
2739			    "Device %s: failed to clear metadata on %s: %d.",
2740			    sc->sc_name, g_mirror_get_diskname(disk), error);
2741			break;
2742		}
2743		DISK_STATE_CHANGED();
2744		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2745		    sc->sc_name, g_mirror_get_diskname(disk));
2746
2747		g_mirror_destroy_disk(disk);
2748		sc->sc_ndisks--;
2749		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2750			g_mirror_update_metadata(disk);
2751		}
2752		break;
2753	    }
2754	default:
2755		KASSERT(1 == 0, ("Unknown state (%u).", state));
2756		break;
2757	}
2758	return (0);
2759}
2760#undef	DISK_STATE_CHANGED
2761
2762int
2763g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2764{
2765	struct g_provider *pp;
2766	u_char *buf;
2767	int error;
2768
2769	g_topology_assert();
2770
2771	error = g_access(cp, 1, 0, 0);
2772	if (error != 0)
2773		return (error);
2774	pp = cp->provider;
2775	g_topology_unlock();
2776	/* Metadata are stored on last sector. */
2777	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2778	    &error);
2779	g_topology_lock();
2780	g_access(cp, -1, 0, 0);
2781	if (buf == NULL) {
2782		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2783		    cp->provider->name, error);
2784		return (error);
2785	}
2786
2787	/* Decode metadata. */
2788	error = mirror_metadata_decode(buf, md);
2789	g_free(buf);
2790	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2791		return (EINVAL);
2792	if (md->md_version > G_MIRROR_VERSION) {
2793		G_MIRROR_DEBUG(0,
2794		    "Kernel module is too old to handle metadata from %s.",
2795		    cp->provider->name);
2796		return (EINVAL);
2797	}
2798	if (error != 0) {
2799		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2800		    cp->provider->name);
2801		return (error);
2802	}
2803
2804	return (0);
2805}
2806
2807static int
2808g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2809    struct g_mirror_metadata *md)
2810{
2811
2812	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2813		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2814		    pp->name, md->md_did);
2815		return (EEXIST);
2816	}
2817	if (md->md_all != sc->sc_ndisks) {
2818		G_MIRROR_DEBUG(1,
2819		    "Invalid '%s' field on disk %s (device %s), skipping.",
2820		    "md_all", pp->name, sc->sc_name);
2821		return (EINVAL);
2822	}
2823	if (md->md_slice != sc->sc_slice) {
2824		G_MIRROR_DEBUG(1,
2825		    "Invalid '%s' field on disk %s (device %s), skipping.",
2826		    "md_slice", pp->name, sc->sc_name);
2827		return (EINVAL);
2828	}
2829	if (md->md_balance != sc->sc_balance) {
2830		G_MIRROR_DEBUG(1,
2831		    "Invalid '%s' field on disk %s (device %s), skipping.",
2832		    "md_balance", pp->name, sc->sc_name);
2833		return (EINVAL);
2834	}
2835#if 0
2836	if (md->md_mediasize != sc->sc_mediasize) {
2837		G_MIRROR_DEBUG(1,
2838		    "Invalid '%s' field on disk %s (device %s), skipping.",
2839		    "md_mediasize", pp->name, sc->sc_name);
2840		return (EINVAL);
2841	}
2842#endif
2843	if (sc->sc_mediasize > pp->mediasize) {
2844		G_MIRROR_DEBUG(1,
2845		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2846		    sc->sc_name);
2847		return (EINVAL);
2848	}
2849	if (md->md_sectorsize != sc->sc_sectorsize) {
2850		G_MIRROR_DEBUG(1,
2851		    "Invalid '%s' field on disk %s (device %s), skipping.",
2852		    "md_sectorsize", pp->name, sc->sc_name);
2853		return (EINVAL);
2854	}
2855	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2856		G_MIRROR_DEBUG(1,
2857		    "Invalid sector size of disk %s (device %s), skipping.",
2858		    pp->name, sc->sc_name);
2859		return (EINVAL);
2860	}
2861	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2862		G_MIRROR_DEBUG(1,
2863		    "Invalid device flags on disk %s (device %s), skipping.",
2864		    pp->name, sc->sc_name);
2865		return (EINVAL);
2866	}
2867	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2868		G_MIRROR_DEBUG(1,
2869		    "Invalid disk flags on disk %s (device %s), skipping.",
2870		    pp->name, sc->sc_name);
2871		return (EINVAL);
2872	}
2873	return (0);
2874}
2875
2876int
2877g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2878    struct g_mirror_metadata *md)
2879{
2880	struct g_mirror_disk *disk;
2881	int error;
2882
2883	g_topology_assert_not();
2884	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2885
2886	error = g_mirror_check_metadata(sc, pp, md);
2887	if (error != 0)
2888		return (error);
2889	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2890	    md->md_genid < sc->sc_genid) {
2891		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2892		    pp->name, sc->sc_name);
2893		return (EINVAL);
2894	}
2895	disk = g_mirror_init_disk(sc, pp, md, &error);
2896	if (disk == NULL)
2897		return (error);
2898	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2899	    G_MIRROR_EVENT_WAIT);
2900	if (error != 0)
2901		return (error);
2902	if (md->md_version < G_MIRROR_VERSION) {
2903		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2904		    pp->name, md->md_version, G_MIRROR_VERSION);
2905		g_mirror_update_metadata(disk);
2906	}
2907	return (0);
2908}
2909
2910static void
2911g_mirror_destroy_delayed(void *arg, int flag)
2912{
2913	struct g_mirror_softc *sc;
2914	int error;
2915
2916	if (flag == EV_CANCEL) {
2917		G_MIRROR_DEBUG(1, "Destroying canceled.");
2918		return;
2919	}
2920	sc = arg;
2921	g_topology_unlock();
2922	sx_xlock(&sc->sc_lock);
2923	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2924	    ("DESTROY flag set on %s.", sc->sc_name));
2925	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
2926	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
2927	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2928	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2929	if (error != 0) {
2930		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2931		    sc->sc_name, error);
2932		sx_xunlock(&sc->sc_lock);
2933	}
2934	g_topology_lock();
2935}
2936
2937static int
2938g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2939{
2940	struct g_mirror_softc *sc;
2941	int error = 0;
2942
2943	g_topology_assert();
2944	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2945	    acw, ace);
2946
2947	sc = pp->private;
2948	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2949
2950	g_topology_unlock();
2951	sx_xlock(&sc->sc_lock);
2952	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2953	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
2954	    LIST_EMPTY(&sc->sc_disks)) {
2955		if (acr > 0 || acw > 0 || ace > 0)
2956			error = ENXIO;
2957		goto end;
2958	}
2959	sc->sc_provider_open += acr + acw + ace;
2960	if (pp->acw + acw == 0)
2961		g_mirror_idle(sc, 0);
2962	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
2963	    sc->sc_provider_open == 0)
2964		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2965end:
2966	sx_xunlock(&sc->sc_lock);
2967	g_topology_lock();
2968	return (error);
2969}
2970
2971struct g_geom *
2972g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
2973    u_int type)
2974{
2975	struct g_mirror_softc *sc;
2976	struct g_geom *gp;
2977	int error, timeout;
2978
2979	g_topology_assert();
2980	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2981	    md->md_mid);
2982
2983	/* One disk is minimum. */
2984	if (md->md_all < 1)
2985		return (NULL);
2986	/*
2987	 * Action geom.
2988	 */
2989	gp = g_new_geomf(mp, "%s", md->md_name);
2990	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2991	gp->start = g_mirror_start;
2992	gp->orphan = g_mirror_orphan;
2993	gp->access = g_mirror_access;
2994	gp->dumpconf = g_mirror_dumpconf;
2995
2996	sc->sc_type = type;
2997	sc->sc_id = md->md_mid;
2998	sc->sc_slice = md->md_slice;
2999	sc->sc_balance = md->md_balance;
3000	sc->sc_mediasize = md->md_mediasize;
3001	sc->sc_sectorsize = md->md_sectorsize;
3002	sc->sc_ndisks = md->md_all;
3003	sc->sc_flags = md->md_mflags;
3004	sc->sc_bump_id = 0;
3005	sc->sc_idle = 1;
3006	sc->sc_last_write = time_uptime;
3007	sc->sc_writes = 0;
3008	sc->sc_refcnt = 1;
3009	sx_init(&sc->sc_lock, "gmirror:lock");
3010	bioq_init(&sc->sc_queue);
3011	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3012	bioq_init(&sc->sc_regular_delayed);
3013	bioq_init(&sc->sc_inflight);
3014	bioq_init(&sc->sc_sync_delayed);
3015	LIST_INIT(&sc->sc_disks);
3016	TAILQ_INIT(&sc->sc_events);
3017	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3018	callout_init(&sc->sc_callout, 1);
3019	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3020	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3021	gp->softc = sc;
3022	sc->sc_geom = gp;
3023	sc->sc_provider = NULL;
3024	sc->sc_provider_open = 0;
3025	/*
3026	 * Synchronization geom.
3027	 */
3028	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3029	gp->softc = sc;
3030	gp->orphan = g_mirror_orphan;
3031	sc->sc_sync.ds_geom = gp;
3032	sc->sc_sync.ds_ndisks = 0;
3033	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3034	    "g_mirror %s", md->md_name);
3035	if (error != 0) {
3036		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3037		    sc->sc_name);
3038		g_destroy_geom(sc->sc_sync.ds_geom);
3039		g_destroy_geom(sc->sc_geom);
3040		g_mirror_free_device(sc);
3041		return (NULL);
3042	}
3043
3044	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3045	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3046
3047	sc->sc_rootmount = root_mount_hold("GMIRROR");
3048	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3049	/*
3050	 * Run timeout.
3051	 */
3052	timeout = g_mirror_timeout * hz;
3053	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3054	return (sc->sc_geom);
3055}
3056
3057int
3058g_mirror_destroy(struct g_mirror_softc *sc, int how)
3059{
3060	struct g_mirror_disk *disk;
3061
3062	g_topology_assert_not();
3063	sx_assert(&sc->sc_lock, SX_XLOCKED);
3064
3065	if (sc->sc_provider_open != 0) {
3066		switch (how) {
3067		case G_MIRROR_DESTROY_SOFT:
3068			G_MIRROR_DEBUG(1,
3069			    "Device %s is still open (%d).", sc->sc_name,
3070			    sc->sc_provider_open);
3071			return (EBUSY);
3072		case G_MIRROR_DESTROY_DELAYED:
3073			G_MIRROR_DEBUG(1,
3074			    "Device %s will be destroyed on last close.",
3075			    sc->sc_name);
3076			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3077				if (disk->d_state ==
3078				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3079					g_mirror_sync_stop(disk, 1);
3080				}
3081			}
3082			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3083			return (EBUSY);
3084		case G_MIRROR_DESTROY_HARD:
3085			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3086			    "can't be definitely removed.", sc->sc_name);
3087		}
3088	}
3089
3090	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3091		sx_xunlock(&sc->sc_lock);
3092		return (0);
3093	}
3094	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3095	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3096	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3097	sx_xunlock(&sc->sc_lock);
3098	mtx_lock(&sc->sc_queue_mtx);
3099	wakeup(sc);
3100	mtx_unlock(&sc->sc_queue_mtx);
3101	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3102	while (sc->sc_worker != NULL)
3103		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3104	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3105	sx_xlock(&sc->sc_lock);
3106	g_mirror_destroy_device(sc);
3107	return (0);
3108}
3109
3110static void
3111g_mirror_taste_orphan(struct g_consumer *cp)
3112{
3113
3114	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3115	    cp->provider->name));
3116}
3117
3118static struct g_geom *
3119g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3120{
3121	struct g_mirror_metadata md;
3122	struct g_mirror_softc *sc;
3123	struct g_consumer *cp;
3124	struct g_geom *gp;
3125	int error;
3126
3127	g_topology_assert();
3128	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3129	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3130
3131	gp = g_new_geomf(mp, "mirror:taste");
3132	/*
3133	 * This orphan function should be never called.
3134	 */
3135	gp->orphan = g_mirror_taste_orphan;
3136	cp = g_new_consumer(gp);
3137	g_attach(cp, pp);
3138	error = g_mirror_read_metadata(cp, &md);
3139	g_detach(cp);
3140	g_destroy_consumer(cp);
3141	g_destroy_geom(gp);
3142	if (error != 0)
3143		return (NULL);
3144	gp = NULL;
3145
3146	if (md.md_provider[0] != '\0' &&
3147	    !g_compare_names(md.md_provider, pp->name))
3148		return (NULL);
3149	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3150		return (NULL);
3151	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3152		G_MIRROR_DEBUG(0,
3153		    "Device %s: provider %s marked as inactive, skipping.",
3154		    md.md_name, pp->name);
3155		return (NULL);
3156	}
3157	if (g_mirror_debug >= 2)
3158		mirror_metadata_dump(&md);
3159
3160	/*
3161	 * Let's check if device already exists.
3162	 */
3163	sc = NULL;
3164	LIST_FOREACH(gp, &mp->geom, geom) {
3165		sc = gp->softc;
3166		if (sc == NULL)
3167			continue;
3168		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3169			continue;
3170		if (sc->sc_sync.ds_geom == gp)
3171			continue;
3172		if (strcmp(md.md_name, sc->sc_name) != 0)
3173			continue;
3174		if (md.md_mid != sc->sc_id) {
3175			G_MIRROR_DEBUG(0, "Device %s already configured.",
3176			    sc->sc_name);
3177			return (NULL);
3178		}
3179		break;
3180	}
3181	if (gp == NULL) {
3182		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3183		if (gp == NULL) {
3184			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3185			    md.md_name);
3186			return (NULL);
3187		}
3188		sc = gp->softc;
3189	}
3190	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3191	g_topology_unlock();
3192	sx_xlock(&sc->sc_lock);
3193	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3194	error = g_mirror_add_disk(sc, pp, &md);
3195	if (error != 0) {
3196		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3197		    pp->name, gp->name, error);
3198		if (LIST_EMPTY(&sc->sc_disks)) {
3199			g_cancel_event(sc);
3200			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3201			g_topology_lock();
3202			return (NULL);
3203		}
3204		gp = NULL;
3205	}
3206	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3207	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3208		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3209		g_topology_lock();
3210		return (NULL);
3211	}
3212	sx_xunlock(&sc->sc_lock);
3213	g_topology_lock();
3214	return (gp);
3215}
3216
3217static void
3218g_mirror_resize(struct g_consumer *cp)
3219{
3220	struct g_mirror_disk *disk;
3221
3222	g_topology_assert();
3223	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3224
3225	disk = cp->private;
3226	if (disk == NULL)
3227		return;
3228	g_topology_unlock();
3229	g_mirror_update_metadata(disk);
3230	g_topology_lock();
3231}
3232
3233static int
3234g_mirror_destroy_geom(struct gctl_req *req __unused,
3235    struct g_class *mp __unused, struct g_geom *gp)
3236{
3237	struct g_mirror_softc *sc;
3238	int error;
3239
3240	g_topology_unlock();
3241	sc = gp->softc;
3242	sx_xlock(&sc->sc_lock);
3243	g_cancel_event(sc);
3244	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3245	if (error != 0)
3246		sx_xunlock(&sc->sc_lock);
3247	g_topology_lock();
3248	return (error);
3249}
3250
3251static void
3252g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3253    struct g_consumer *cp, struct g_provider *pp)
3254{
3255	struct g_mirror_softc *sc;
3256
3257	g_topology_assert();
3258
3259	sc = gp->softc;
3260	if (sc == NULL)
3261		return;
3262	/* Skip synchronization geom. */
3263	if (gp == sc->sc_sync.ds_geom)
3264		return;
3265	if (pp != NULL) {
3266		/* Nothing here. */
3267	} else if (cp != NULL) {
3268		struct g_mirror_disk *disk;
3269
3270		disk = cp->private;
3271		if (disk == NULL)
3272			return;
3273		g_topology_unlock();
3274		sx_xlock(&sc->sc_lock);
3275		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3276		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3277			sbuf_printf(sb, "%s<Synchronized>", indent);
3278			if (disk->d_sync.ds_offset == 0)
3279				sbuf_printf(sb, "0%%");
3280			else {
3281				sbuf_printf(sb, "%u%%",
3282				    (u_int)((disk->d_sync.ds_offset * 100) /
3283				    sc->sc_provider->mediasize));
3284			}
3285			sbuf_printf(sb, "</Synchronized>\n");
3286			if (disk->d_sync.ds_offset > 0) {
3287				sbuf_printf(sb, "%s<BytesSynced>%jd"
3288				    "</BytesSynced>\n", indent,
3289				    (intmax_t)disk->d_sync.ds_offset);
3290			}
3291		}
3292		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3293		    disk->d_sync.ds_syncid);
3294		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3295		    disk->d_genid);
3296		sbuf_printf(sb, "%s<Flags>", indent);
3297		if (disk->d_flags == 0)
3298			sbuf_printf(sb, "NONE");
3299		else {
3300			int first = 1;
3301
3302#define	ADD_FLAG(flag, name)	do {					\
3303	if ((disk->d_flags & (flag)) != 0) {				\
3304		if (!first)						\
3305			sbuf_printf(sb, ", ");				\
3306		else							\
3307			first = 0;					\
3308		sbuf_printf(sb, name);					\
3309	}								\
3310} while (0)
3311			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3312			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3313			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3314			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3315			    "SYNCHRONIZING");
3316			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3317			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3318#undef	ADD_FLAG
3319		}
3320		sbuf_printf(sb, "</Flags>\n");
3321		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3322		    disk->d_priority);
3323		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3324		    g_mirror_disk_state2str(disk->d_state));
3325		sx_xunlock(&sc->sc_lock);
3326		g_topology_lock();
3327	} else {
3328		g_topology_unlock();
3329		sx_xlock(&sc->sc_lock);
3330		sbuf_printf(sb, "%s<Type>", indent);
3331		switch (sc->sc_type) {
3332		case G_MIRROR_TYPE_AUTOMATIC:
3333			sbuf_printf(sb, "AUTOMATIC");
3334			break;
3335		case G_MIRROR_TYPE_MANUAL:
3336			sbuf_printf(sb, "MANUAL");
3337			break;
3338		default:
3339			sbuf_printf(sb, "UNKNOWN");
3340			break;
3341		}
3342		sbuf_printf(sb, "</Type>\n");
3343		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3344		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3345		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3346		sbuf_printf(sb, "%s<Flags>", indent);
3347		if (sc->sc_flags == 0)
3348			sbuf_printf(sb, "NONE");
3349		else {
3350			int first = 1;
3351
3352#define	ADD_FLAG(flag, name)	do {					\
3353	if ((sc->sc_flags & (flag)) != 0) {				\
3354		if (!first)						\
3355			sbuf_printf(sb, ", ");				\
3356		else							\
3357			first = 0;					\
3358		sbuf_printf(sb, name);					\
3359	}								\
3360} while (0)
3361			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3362			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3363#undef	ADD_FLAG
3364		}
3365		sbuf_printf(sb, "</Flags>\n");
3366		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3367		    (u_int)sc->sc_slice);
3368		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3369		    balance_name(sc->sc_balance));
3370		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3371		    sc->sc_ndisks);
3372		sbuf_printf(sb, "%s<State>", indent);
3373		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3374			sbuf_printf(sb, "%s", "STARTING");
3375		else if (sc->sc_ndisks ==
3376		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3377			sbuf_printf(sb, "%s", "COMPLETE");
3378		else
3379			sbuf_printf(sb, "%s", "DEGRADED");
3380		sbuf_printf(sb, "</State>\n");
3381		sx_xunlock(&sc->sc_lock);
3382		g_topology_lock();
3383	}
3384}
3385
3386static void
3387g_mirror_shutdown_post_sync(void *arg, int howto)
3388{
3389	struct g_class *mp;
3390	struct g_geom *gp, *gp2;
3391	struct g_mirror_softc *sc;
3392	int error;
3393
3394	if (panicstr != NULL)
3395		return;
3396
3397	mp = arg;
3398	g_topology_lock();
3399	g_mirror_shutdown = 1;
3400	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3401		if ((sc = gp->softc) == NULL)
3402			continue;
3403		/* Skip synchronization geom. */
3404		if (gp == sc->sc_sync.ds_geom)
3405			continue;
3406		g_topology_unlock();
3407		sx_xlock(&sc->sc_lock);
3408		g_mirror_idle(sc, -1);
3409		g_cancel_event(sc);
3410		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3411		if (error != 0)
3412			sx_xunlock(&sc->sc_lock);
3413		g_topology_lock();
3414	}
3415	g_topology_unlock();
3416}
3417
3418static void
3419g_mirror_init(struct g_class *mp)
3420{
3421
3422	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3423	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3424	if (g_mirror_post_sync == NULL)
3425		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3426}
3427
3428static void
3429g_mirror_fini(struct g_class *mp)
3430{
3431
3432	if (g_mirror_post_sync != NULL)
3433		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3434}
3435
3436DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3437