g_mirror.c revision 327804
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 327804 2018-01-11 00:54:54Z markj $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fail.h>
33#include <sys/kernel.h>
34#include <sys/module.h>
35#include <sys/limits.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/bio.h>
39#include <sys/sbuf.h>
40#include <sys/sysctl.h>
41#include <sys/malloc.h>
42#include <sys/eventhandler.h>
43#include <vm/uma.h>
44#include <geom/geom.h>
45#include <sys/proc.h>
46#include <sys/kthread.h>
47#include <sys/sched.h>
48#include <geom/mirror/g_mirror.h>
49
50FEATURE(geom_mirror, "GEOM mirroring support");
51
52static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
53
54SYSCTL_DECL(_kern_geom);
55static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
56    "GEOM_MIRROR stuff");
57int g_mirror_debug = 0;
58SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
62    0, "Time to wait on all mirror components");
63static u_int g_mirror_idletime = 5;
64SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
65    &g_mirror_idletime, 0, "Mark components as clean when idling");
66static u_int g_mirror_disconnect_on_failure = 1;
67SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
68    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
69static u_int g_mirror_syncreqs = 2;
70SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
71    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
72static u_int g_mirror_sync_period = 5;
73SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
74    &g_mirror_sync_period, 0,
75    "Metadata update period during synchronization, in seconds");
76
77#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
78	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
79	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
80	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
81} while (0)
82
83static eventhandler_tag g_mirror_post_sync = NULL;
84static int g_mirror_shutdown = 0;
85
86static g_ctl_destroy_geom_t g_mirror_destroy_geom;
87static g_taste_t g_mirror_taste;
88static g_init_t g_mirror_init;
89static g_fini_t g_mirror_fini;
90static g_provgone_t g_mirror_providergone;
91static g_resize_t g_mirror_resize;
92
93struct g_class g_mirror_class = {
94	.name = G_MIRROR_CLASS_NAME,
95	.version = G_VERSION,
96	.ctlreq = g_mirror_config,
97	.taste = g_mirror_taste,
98	.destroy_geom = g_mirror_destroy_geom,
99	.init = g_mirror_init,
100	.fini = g_mirror_fini,
101	.providergone = g_mirror_providergone,
102	.resize = g_mirror_resize
103};
104
105
106static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
107static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
108static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
109static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
110    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
111static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
112static void g_mirror_register_request(struct bio *bp);
113static void g_mirror_sync_release(struct g_mirror_softc *sc);
114
115
116static const char *
117g_mirror_disk_state2str(int state)
118{
119
120	switch (state) {
121	case G_MIRROR_DISK_STATE_NONE:
122		return ("NONE");
123	case G_MIRROR_DISK_STATE_NEW:
124		return ("NEW");
125	case G_MIRROR_DISK_STATE_ACTIVE:
126		return ("ACTIVE");
127	case G_MIRROR_DISK_STATE_STALE:
128		return ("STALE");
129	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
130		return ("SYNCHRONIZING");
131	case G_MIRROR_DISK_STATE_DISCONNECTED:
132		return ("DISCONNECTED");
133	case G_MIRROR_DISK_STATE_DESTROY:
134		return ("DESTROY");
135	default:
136		return ("INVALID");
137	}
138}
139
140static const char *
141g_mirror_device_state2str(int state)
142{
143
144	switch (state) {
145	case G_MIRROR_DEVICE_STATE_STARTING:
146		return ("STARTING");
147	case G_MIRROR_DEVICE_STATE_RUNNING:
148		return ("RUNNING");
149	default:
150		return ("INVALID");
151	}
152}
153
154static const char *
155g_mirror_get_diskname(struct g_mirror_disk *disk)
156{
157
158	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
159		return ("[unknown]");
160	return (disk->d_name);
161}
162
163/*
164 * --- Events handling functions ---
165 * Events in geom_mirror are used to maintain disks and device status
166 * from one thread to simplify locking.
167 */
168static void
169g_mirror_event_free(struct g_mirror_event *ep)
170{
171
172	free(ep, M_MIRROR);
173}
174
175int
176g_mirror_event_send(void *arg, int state, int flags)
177{
178	struct g_mirror_softc *sc;
179	struct g_mirror_disk *disk;
180	struct g_mirror_event *ep;
181	int error;
182
183	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
184	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
185	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
186		disk = NULL;
187		sc = arg;
188	} else {
189		disk = arg;
190		sc = disk->d_softc;
191	}
192	ep->e_disk = disk;
193	ep->e_state = state;
194	ep->e_flags = flags;
195	ep->e_error = 0;
196	mtx_lock(&sc->sc_events_mtx);
197	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
198	mtx_unlock(&sc->sc_events_mtx);
199	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
200	mtx_lock(&sc->sc_queue_mtx);
201	wakeup(sc);
202	mtx_unlock(&sc->sc_queue_mtx);
203	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
204		return (0);
205	sx_assert(&sc->sc_lock, SX_XLOCKED);
206	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
207	sx_xunlock(&sc->sc_lock);
208	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
209		mtx_lock(&sc->sc_events_mtx);
210		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
211		    hz * 5);
212	}
213	error = ep->e_error;
214	g_mirror_event_free(ep);
215	sx_xlock(&sc->sc_lock);
216	return (error);
217}
218
219static struct g_mirror_event *
220g_mirror_event_first(struct g_mirror_softc *sc)
221{
222	struct g_mirror_event *ep;
223
224	mtx_lock(&sc->sc_events_mtx);
225	ep = TAILQ_FIRST(&sc->sc_events);
226	mtx_unlock(&sc->sc_events_mtx);
227	return (ep);
228}
229
230static void
231g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
232{
233
234	mtx_lock(&sc->sc_events_mtx);
235	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
236	mtx_unlock(&sc->sc_events_mtx);
237}
238
239static void
240g_mirror_event_cancel(struct g_mirror_disk *disk)
241{
242	struct g_mirror_softc *sc;
243	struct g_mirror_event *ep, *tmpep;
244
245	sc = disk->d_softc;
246	sx_assert(&sc->sc_lock, SX_XLOCKED);
247
248	mtx_lock(&sc->sc_events_mtx);
249	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
250		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
251			continue;
252		if (ep->e_disk != disk)
253			continue;
254		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
255		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
256			g_mirror_event_free(ep);
257		else {
258			ep->e_error = ECANCELED;
259			wakeup(ep);
260		}
261	}
262	mtx_unlock(&sc->sc_events_mtx);
263}
264
265/*
266 * Return the number of disks in given state.
267 * If state is equal to -1, count all connected disks.
268 */
269u_int
270g_mirror_ndisks(struct g_mirror_softc *sc, int state)
271{
272	struct g_mirror_disk *disk;
273	u_int n = 0;
274
275	sx_assert(&sc->sc_lock, SX_LOCKED);
276
277	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
278		if (state == -1 || disk->d_state == state)
279			n++;
280	}
281	return (n);
282}
283
284/*
285 * Find a disk in mirror by its disk ID.
286 */
287static struct g_mirror_disk *
288g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
289{
290	struct g_mirror_disk *disk;
291
292	sx_assert(&sc->sc_lock, SX_XLOCKED);
293
294	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
295		if (disk->d_id == id)
296			return (disk);
297	}
298	return (NULL);
299}
300
301static u_int
302g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
303{
304	struct bio *bp;
305	u_int nreqs = 0;
306
307	mtx_lock(&sc->sc_queue_mtx);
308	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
309		if (bp->bio_from == cp)
310			nreqs++;
311	}
312	mtx_unlock(&sc->sc_queue_mtx);
313	return (nreqs);
314}
315
316static int
317g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
318{
319
320	if (cp->index > 0) {
321		G_MIRROR_DEBUG(2,
322		    "I/O requests for %s exist, can't destroy it now.",
323		    cp->provider->name);
324		return (1);
325	}
326	if (g_mirror_nrequests(sc, cp) > 0) {
327		G_MIRROR_DEBUG(2,
328		    "I/O requests for %s in queue, can't destroy it now.",
329		    cp->provider->name);
330		return (1);
331	}
332	return (0);
333}
334
335static void
336g_mirror_destroy_consumer(void *arg, int flags __unused)
337{
338	struct g_consumer *cp;
339
340	g_topology_assert();
341
342	cp = arg;
343	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
344	g_detach(cp);
345	g_destroy_consumer(cp);
346}
347
348static void
349g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
350{
351	struct g_provider *pp;
352	int retaste_wait;
353
354	g_topology_assert();
355
356	cp->private = NULL;
357	if (g_mirror_is_busy(sc, cp))
358		return;
359	pp = cp->provider;
360	retaste_wait = 0;
361	if (cp->acw == 1) {
362		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
363			retaste_wait = 1;
364	}
365	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
366	    -cp->acw, -cp->ace, 0);
367	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
368		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
369	if (retaste_wait) {
370		/*
371		 * After retaste event was send (inside g_access()), we can send
372		 * event to detach and destroy consumer.
373		 * A class, which has consumer to the given provider connected
374		 * will not receive retaste event for the provider.
375		 * This is the way how I ignore retaste events when I close
376		 * consumers opened for write: I detach and destroy consumer
377		 * after retaste event is sent.
378		 */
379		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
380		return;
381	}
382	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
383	g_detach(cp);
384	g_destroy_consumer(cp);
385}
386
387static int
388g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
389{
390	struct g_consumer *cp;
391	int error;
392
393	g_topology_assert_not();
394	KASSERT(disk->d_consumer == NULL,
395	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
396
397	g_topology_lock();
398	cp = g_new_consumer(disk->d_softc->sc_geom);
399	cp->flags |= G_CF_DIRECT_RECEIVE;
400	error = g_attach(cp, pp);
401	if (error != 0) {
402		g_destroy_consumer(cp);
403		g_topology_unlock();
404		return (error);
405	}
406	error = g_access(cp, 1, 1, 1);
407	if (error != 0) {
408		g_detach(cp);
409		g_destroy_consumer(cp);
410		g_topology_unlock();
411		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
412		    pp->name, error);
413		return (error);
414	}
415	g_topology_unlock();
416	disk->d_consumer = cp;
417	disk->d_consumer->private = disk;
418	disk->d_consumer->index = 0;
419
420	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
421	return (0);
422}
423
424static void
425g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
426{
427
428	g_topology_assert();
429
430	if (cp == NULL)
431		return;
432	if (cp->provider != NULL)
433		g_mirror_kill_consumer(sc, cp);
434	else
435		g_destroy_consumer(cp);
436}
437
438/*
439 * Initialize disk. This means allocate memory, create consumer, attach it
440 * to the provider and open access (r1w1e1) to it.
441 */
442static struct g_mirror_disk *
443g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
444    struct g_mirror_metadata *md, int *errorp)
445{
446	struct g_mirror_disk *disk;
447	int i, error;
448
449	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
450	if (disk == NULL) {
451		error = ENOMEM;
452		goto fail;
453	}
454	disk->d_softc = sc;
455	error = g_mirror_connect_disk(disk, pp);
456	if (error != 0)
457		goto fail;
458	disk->d_id = md->md_did;
459	disk->d_state = G_MIRROR_DISK_STATE_NONE;
460	disk->d_priority = md->md_priority;
461	disk->d_flags = md->md_dflags;
462	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
463	if (error == 0 && i != 0)
464		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
465	if (md->md_provider[0] != '\0')
466		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
467	disk->d_sync.ds_consumer = NULL;
468	disk->d_sync.ds_offset = md->md_sync_offset;
469	disk->d_sync.ds_offset_done = md->md_sync_offset;
470	disk->d_sync.ds_update_ts = time_uptime;
471	disk->d_genid = md->md_genid;
472	disk->d_sync.ds_syncid = md->md_syncid;
473	if (errorp != NULL)
474		*errorp = 0;
475	return (disk);
476fail:
477	if (errorp != NULL)
478		*errorp = error;
479	if (disk != NULL)
480		free(disk, M_MIRROR);
481	return (NULL);
482}
483
484static void
485g_mirror_destroy_disk(struct g_mirror_disk *disk)
486{
487	struct g_mirror_softc *sc;
488
489	g_topology_assert_not();
490	sc = disk->d_softc;
491	sx_assert(&sc->sc_lock, SX_XLOCKED);
492
493	LIST_REMOVE(disk, d_next);
494	g_mirror_event_cancel(disk);
495	if (sc->sc_hint == disk)
496		sc->sc_hint = NULL;
497	switch (disk->d_state) {
498	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
499		g_mirror_sync_stop(disk, 1);
500		/* FALLTHROUGH */
501	case G_MIRROR_DISK_STATE_NEW:
502	case G_MIRROR_DISK_STATE_STALE:
503	case G_MIRROR_DISK_STATE_ACTIVE:
504		g_topology_lock();
505		g_mirror_disconnect_consumer(sc, disk->d_consumer);
506		g_topology_unlock();
507		free(disk, M_MIRROR);
508		break;
509	default:
510		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
511		    g_mirror_get_diskname(disk),
512		    g_mirror_disk_state2str(disk->d_state)));
513	}
514}
515
516static void
517g_mirror_free_device(struct g_mirror_softc *sc)
518{
519
520	mtx_destroy(&sc->sc_queue_mtx);
521	mtx_destroy(&sc->sc_events_mtx);
522	mtx_destroy(&sc->sc_done_mtx);
523	sx_destroy(&sc->sc_lock);
524	free(sc, M_MIRROR);
525}
526
527static void
528g_mirror_providergone(struct g_provider *pp)
529{
530	struct g_mirror_softc *sc = pp->private;
531
532	if ((--sc->sc_refcnt) == 0)
533		g_mirror_free_device(sc);
534}
535
536static void
537g_mirror_destroy_device(struct g_mirror_softc *sc)
538{
539	struct g_mirror_disk *disk;
540	struct g_mirror_event *ep;
541	struct g_geom *gp;
542	struct g_consumer *cp, *tmpcp;
543
544	g_topology_assert_not();
545	sx_assert(&sc->sc_lock, SX_XLOCKED);
546
547	gp = sc->sc_geom;
548	if (sc->sc_provider != NULL)
549		g_mirror_destroy_provider(sc);
550	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
551	    disk = LIST_FIRST(&sc->sc_disks)) {
552		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
553		g_mirror_update_metadata(disk);
554		g_mirror_destroy_disk(disk);
555	}
556	while ((ep = g_mirror_event_first(sc)) != NULL) {
557		g_mirror_event_remove(sc, ep);
558		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
559			g_mirror_event_free(ep);
560		else {
561			ep->e_error = ECANCELED;
562			ep->e_flags |= G_MIRROR_EVENT_DONE;
563			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
564			mtx_lock(&sc->sc_events_mtx);
565			wakeup(ep);
566			mtx_unlock(&sc->sc_events_mtx);
567		}
568	}
569	callout_drain(&sc->sc_callout);
570
571	g_topology_lock();
572	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
573		g_mirror_disconnect_consumer(sc, cp);
574	}
575	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
576	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
577	g_wither_geom(gp, ENXIO);
578	sx_xunlock(&sc->sc_lock);
579	if ((--sc->sc_refcnt) == 0)
580		g_mirror_free_device(sc);
581	g_topology_unlock();
582}
583
584static void
585g_mirror_orphan(struct g_consumer *cp)
586{
587	struct g_mirror_disk *disk;
588
589	g_topology_assert();
590
591	disk = cp->private;
592	if (disk == NULL)
593		return;
594	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
595	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
596	    G_MIRROR_EVENT_DONTWAIT);
597}
598
599/*
600 * Function should return the next active disk on the list.
601 * It is possible that it will be the same disk as given.
602 * If there are no active disks on list, NULL is returned.
603 */
604static __inline struct g_mirror_disk *
605g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
606{
607	struct g_mirror_disk *dp;
608
609	for (dp = LIST_NEXT(disk, d_next); dp != disk;
610	    dp = LIST_NEXT(dp, d_next)) {
611		if (dp == NULL)
612			dp = LIST_FIRST(&sc->sc_disks);
613		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
614			break;
615	}
616	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
617		return (NULL);
618	return (dp);
619}
620
621static struct g_mirror_disk *
622g_mirror_get_disk(struct g_mirror_softc *sc)
623{
624	struct g_mirror_disk *disk;
625
626	if (sc->sc_hint == NULL) {
627		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
628		if (sc->sc_hint == NULL)
629			return (NULL);
630	}
631	disk = sc->sc_hint;
632	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
633		disk = g_mirror_find_next(sc, disk);
634		if (disk == NULL)
635			return (NULL);
636	}
637	sc->sc_hint = g_mirror_find_next(sc, disk);
638	return (disk);
639}
640
641static int
642g_mirror_write_metadata(struct g_mirror_disk *disk,
643    struct g_mirror_metadata *md)
644{
645	struct g_mirror_softc *sc;
646	struct g_consumer *cp;
647	off_t offset, length;
648	u_char *sector;
649	int error = 0;
650
651	g_topology_assert_not();
652	sc = disk->d_softc;
653	sx_assert(&sc->sc_lock, SX_LOCKED);
654
655	cp = disk->d_consumer;
656	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
657	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
658	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
659	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
660	    cp->acw, cp->ace));
661	length = cp->provider->sectorsize;
662	offset = cp->provider->mediasize - length;
663	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
664	if (md != NULL &&
665	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
666		/*
667		 * Handle the case, when the size of parent provider reduced.
668		 */
669		if (offset < md->md_mediasize)
670			error = ENOSPC;
671		else
672			mirror_metadata_encode(md, sector);
673	}
674	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
675	if (error == 0)
676		error = g_write_data(cp, offset, sector, length);
677	free(sector, M_MIRROR);
678	if (error != 0) {
679		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
680			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
681			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
682			    "(device=%s, error=%d).",
683			    g_mirror_get_diskname(disk), sc->sc_name, error);
684		} else {
685			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
686			    "(device=%s, error=%d).",
687			    g_mirror_get_diskname(disk), sc->sc_name, error);
688		}
689		if (g_mirror_disconnect_on_failure &&
690		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
691			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
692			g_mirror_event_send(disk,
693			    G_MIRROR_DISK_STATE_DISCONNECTED,
694			    G_MIRROR_EVENT_DONTWAIT);
695		}
696	}
697	return (error);
698}
699
700static int
701g_mirror_clear_metadata(struct g_mirror_disk *disk)
702{
703	int error;
704
705	g_topology_assert_not();
706	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
707
708	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
709		return (0);
710	error = g_mirror_write_metadata(disk, NULL);
711	if (error == 0) {
712		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
713		    g_mirror_get_diskname(disk));
714	} else {
715		G_MIRROR_DEBUG(0,
716		    "Cannot clear metadata on disk %s (error=%d).",
717		    g_mirror_get_diskname(disk), error);
718	}
719	return (error);
720}
721
722void
723g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
724    struct g_mirror_metadata *md)
725{
726
727	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
728	md->md_version = G_MIRROR_VERSION;
729	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
730	md->md_mid = sc->sc_id;
731	md->md_all = sc->sc_ndisks;
732	md->md_slice = sc->sc_slice;
733	md->md_balance = sc->sc_balance;
734	md->md_genid = sc->sc_genid;
735	md->md_mediasize = sc->sc_mediasize;
736	md->md_sectorsize = sc->sc_sectorsize;
737	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
738	bzero(md->md_provider, sizeof(md->md_provider));
739	if (disk == NULL) {
740		md->md_did = arc4random();
741		md->md_priority = 0;
742		md->md_syncid = 0;
743		md->md_dflags = 0;
744		md->md_sync_offset = 0;
745		md->md_provsize = 0;
746	} else {
747		md->md_did = disk->d_id;
748		md->md_priority = disk->d_priority;
749		md->md_syncid = disk->d_sync.ds_syncid;
750		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
751		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
752			md->md_sync_offset = disk->d_sync.ds_offset_done;
753		else
754			md->md_sync_offset = 0;
755		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
756			strlcpy(md->md_provider,
757			    disk->d_consumer->provider->name,
758			    sizeof(md->md_provider));
759		}
760		md->md_provsize = disk->d_consumer->provider->mediasize;
761	}
762}
763
764void
765g_mirror_update_metadata(struct g_mirror_disk *disk)
766{
767	struct g_mirror_softc *sc;
768	struct g_mirror_metadata md;
769	int error;
770
771	g_topology_assert_not();
772	sc = disk->d_softc;
773	sx_assert(&sc->sc_lock, SX_LOCKED);
774
775	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
776		return;
777	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
778		g_mirror_fill_metadata(sc, disk, &md);
779	error = g_mirror_write_metadata(disk, &md);
780	if (error == 0) {
781		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
782		    g_mirror_get_diskname(disk));
783	} else {
784		G_MIRROR_DEBUG(0,
785		    "Cannot update metadata on disk %s (error=%d).",
786		    g_mirror_get_diskname(disk), error);
787	}
788}
789
790static void
791g_mirror_bump_syncid(struct g_mirror_softc *sc)
792{
793	struct g_mirror_disk *disk;
794
795	g_topology_assert_not();
796	sx_assert(&sc->sc_lock, SX_XLOCKED);
797	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
798	    ("%s called with no active disks (device=%s).", __func__,
799	    sc->sc_name));
800
801	sc->sc_syncid++;
802	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
803	    sc->sc_syncid);
804	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
805		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
806		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
807			disk->d_sync.ds_syncid = sc->sc_syncid;
808			g_mirror_update_metadata(disk);
809		}
810	}
811}
812
813static void
814g_mirror_bump_genid(struct g_mirror_softc *sc)
815{
816	struct g_mirror_disk *disk;
817
818	g_topology_assert_not();
819	sx_assert(&sc->sc_lock, SX_XLOCKED);
820	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
821	    ("%s called with no active disks (device=%s).", __func__,
822	    sc->sc_name));
823
824	sc->sc_genid++;
825	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
826	    sc->sc_genid);
827	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
828		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
829		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
830			disk->d_genid = sc->sc_genid;
831			g_mirror_update_metadata(disk);
832		}
833	}
834}
835
836static int
837g_mirror_idle(struct g_mirror_softc *sc, int acw)
838{
839	struct g_mirror_disk *disk;
840	int timeout;
841
842	g_topology_assert_not();
843	sx_assert(&sc->sc_lock, SX_XLOCKED);
844
845	if (sc->sc_provider == NULL)
846		return (0);
847	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
848		return (0);
849	if (sc->sc_idle)
850		return (0);
851	if (sc->sc_writes > 0)
852		return (0);
853	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
854		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
855		if (!g_mirror_shutdown && timeout > 0)
856			return (timeout);
857	}
858	sc->sc_idle = 1;
859	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
860		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
861			continue;
862		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
863		    g_mirror_get_diskname(disk), sc->sc_name);
864		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
865		g_mirror_update_metadata(disk);
866	}
867	return (0);
868}
869
870static void
871g_mirror_unidle(struct g_mirror_softc *sc)
872{
873	struct g_mirror_disk *disk;
874
875	g_topology_assert_not();
876	sx_assert(&sc->sc_lock, SX_XLOCKED);
877
878	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
879		return;
880	sc->sc_idle = 0;
881	sc->sc_last_write = time_uptime;
882	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
883		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
884			continue;
885		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
886		    g_mirror_get_diskname(disk), sc->sc_name);
887		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
888		g_mirror_update_metadata(disk);
889	}
890}
891
892static void
893g_mirror_flush_done(struct bio *bp)
894{
895	struct g_mirror_softc *sc;
896	struct bio *pbp;
897
898	pbp = bp->bio_parent;
899	sc = pbp->bio_to->private;
900	mtx_lock(&sc->sc_done_mtx);
901	if (pbp->bio_error == 0)
902		pbp->bio_error = bp->bio_error;
903	pbp->bio_completed += bp->bio_completed;
904	pbp->bio_inbed++;
905	if (pbp->bio_children == pbp->bio_inbed) {
906		mtx_unlock(&sc->sc_done_mtx);
907		g_io_deliver(pbp, pbp->bio_error);
908	} else
909		mtx_unlock(&sc->sc_done_mtx);
910	g_destroy_bio(bp);
911}
912
913static void
914g_mirror_done(struct bio *bp)
915{
916	struct g_mirror_softc *sc;
917
918	sc = bp->bio_from->geom->softc;
919	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
920	mtx_lock(&sc->sc_queue_mtx);
921	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
922	mtx_unlock(&sc->sc_queue_mtx);
923	wakeup(sc);
924}
925
926static void
927g_mirror_regular_request(struct bio *bp)
928{
929	struct g_mirror_softc *sc;
930	struct g_mirror_disk *disk;
931	struct bio *pbp;
932
933	g_topology_assert_not();
934
935	pbp = bp->bio_parent;
936	sc = pbp->bio_to->private;
937	bp->bio_from->index--;
938	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
939		sc->sc_writes--;
940	disk = bp->bio_from->private;
941	if (disk == NULL) {
942		g_topology_lock();
943		g_mirror_kill_consumer(sc, bp->bio_from);
944		g_topology_unlock();
945	}
946
947	if (bp->bio_cmd == BIO_READ)
948		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
949		    bp->bio_error);
950	else if (bp->bio_cmd == BIO_WRITE)
951		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
952		    bp->bio_error);
953
954	pbp->bio_inbed++;
955	KASSERT(pbp->bio_inbed <= pbp->bio_children,
956	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
957	    pbp->bio_children));
958	if (bp->bio_error == 0 && pbp->bio_error == 0) {
959		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
960		g_destroy_bio(bp);
961		if (pbp->bio_children == pbp->bio_inbed) {
962			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
963			pbp->bio_completed = pbp->bio_length;
964			if (pbp->bio_cmd == BIO_WRITE ||
965			    pbp->bio_cmd == BIO_DELETE) {
966				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
967				/* Release delayed sync requests if possible. */
968				g_mirror_sync_release(sc);
969			}
970			g_io_deliver(pbp, pbp->bio_error);
971		}
972		return;
973	} else if (bp->bio_error != 0) {
974		if (pbp->bio_error == 0)
975			pbp->bio_error = bp->bio_error;
976		if (disk != NULL) {
977			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
978				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
979				G_MIRROR_LOGREQ(0, bp,
980				    "Request failed (error=%d).",
981				    bp->bio_error);
982			} else {
983				G_MIRROR_LOGREQ(1, bp,
984				    "Request failed (error=%d).",
985				    bp->bio_error);
986			}
987			if (g_mirror_disconnect_on_failure &&
988			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
989			{
990				if (bp->bio_error == ENXIO &&
991				    bp->bio_cmd == BIO_READ)
992					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
993				else if (bp->bio_error == ENXIO)
994					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
995				else
996					sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
997				g_mirror_event_send(disk,
998				    G_MIRROR_DISK_STATE_DISCONNECTED,
999				    G_MIRROR_EVENT_DONTWAIT);
1000			}
1001		}
1002		switch (pbp->bio_cmd) {
1003		case BIO_DELETE:
1004		case BIO_WRITE:
1005			pbp->bio_inbed--;
1006			pbp->bio_children--;
1007			break;
1008		}
1009	}
1010	g_destroy_bio(bp);
1011
1012	switch (pbp->bio_cmd) {
1013	case BIO_READ:
1014		if (pbp->bio_inbed < pbp->bio_children)
1015			break;
1016		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1017			g_io_deliver(pbp, pbp->bio_error);
1018		else {
1019			pbp->bio_error = 0;
1020			mtx_lock(&sc->sc_queue_mtx);
1021			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1022			mtx_unlock(&sc->sc_queue_mtx);
1023			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1024			wakeup(sc);
1025		}
1026		break;
1027	case BIO_DELETE:
1028	case BIO_WRITE:
1029		if (pbp->bio_children == 0) {
1030			/*
1031			 * All requests failed.
1032			 */
1033		} else if (pbp->bio_inbed < pbp->bio_children) {
1034			/* Do nothing. */
1035			break;
1036		} else if (pbp->bio_children == pbp->bio_inbed) {
1037			/* Some requests succeeded. */
1038			pbp->bio_error = 0;
1039			pbp->bio_completed = pbp->bio_length;
1040		}
1041		TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1042		/* Release delayed sync requests if possible. */
1043		g_mirror_sync_release(sc);
1044		g_io_deliver(pbp, pbp->bio_error);
1045		break;
1046	default:
1047		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1048		break;
1049	}
1050}
1051
1052static void
1053g_mirror_sync_done(struct bio *bp)
1054{
1055	struct g_mirror_softc *sc;
1056
1057	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1058	sc = bp->bio_from->geom->softc;
1059	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1060	mtx_lock(&sc->sc_queue_mtx);
1061	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1062	mtx_unlock(&sc->sc_queue_mtx);
1063	wakeup(sc);
1064}
1065
1066static void
1067g_mirror_candelete(struct bio *bp)
1068{
1069	struct g_mirror_softc *sc;
1070	struct g_mirror_disk *disk;
1071	int *val;
1072
1073	sc = bp->bio_to->private;
1074	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1075		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1076			break;
1077	}
1078	val = (int *)bp->bio_data;
1079	*val = (disk != NULL);
1080	g_io_deliver(bp, 0);
1081}
1082
1083static void
1084g_mirror_kernel_dump(struct bio *bp)
1085{
1086	struct g_mirror_softc *sc;
1087	struct g_mirror_disk *disk;
1088	struct bio *cbp;
1089	struct g_kerneldump *gkd;
1090
1091	/*
1092	 * We configure dumping to the first component, because this component
1093	 * will be used for reading with 'prefer' balance algorithm.
1094	 * If the component with the highest priority is currently disconnected
1095	 * we will not be able to read the dump after the reboot if it will be
1096	 * connected and synchronized later. Can we do something better?
1097	 */
1098	sc = bp->bio_to->private;
1099	disk = LIST_FIRST(&sc->sc_disks);
1100
1101	gkd = (struct g_kerneldump *)bp->bio_data;
1102	if (gkd->length > bp->bio_to->mediasize)
1103		gkd->length = bp->bio_to->mediasize;
1104	cbp = g_clone_bio(bp);
1105	if (cbp == NULL) {
1106		g_io_deliver(bp, ENOMEM);
1107		return;
1108	}
1109	cbp->bio_done = g_std_done;
1110	g_io_request(cbp, disk->d_consumer);
1111	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1112	    g_mirror_get_diskname(disk));
1113}
1114
1115static void
1116g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1117{
1118	struct bio_queue queue;
1119	struct g_mirror_disk *disk;
1120	struct g_consumer *cp;
1121	struct bio *cbp;
1122
1123	TAILQ_INIT(&queue);
1124	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1125		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1126			continue;
1127		cbp = g_clone_bio(bp);
1128		if (cbp == NULL) {
1129			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1130				TAILQ_REMOVE(&queue, cbp, bio_queue);
1131				g_destroy_bio(cbp);
1132			}
1133			if (bp->bio_error == 0)
1134				bp->bio_error = ENOMEM;
1135			g_io_deliver(bp, bp->bio_error);
1136			return;
1137		}
1138		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1139		cbp->bio_done = g_mirror_flush_done;
1140		cbp->bio_caller1 = disk;
1141		cbp->bio_to = disk->d_consumer->provider;
1142	}
1143	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1144		TAILQ_REMOVE(&queue, cbp, bio_queue);
1145		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1146		disk = cbp->bio_caller1;
1147		cbp->bio_caller1 = NULL;
1148		cp = disk->d_consumer;
1149		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1150		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1151		    cp->acr, cp->acw, cp->ace));
1152		g_io_request(cbp, disk->d_consumer);
1153	}
1154}
1155
1156static void
1157g_mirror_start(struct bio *bp)
1158{
1159	struct g_mirror_softc *sc;
1160
1161	sc = bp->bio_to->private;
1162	/*
1163	 * If sc == NULL or there are no valid disks, provider's error
1164	 * should be set and g_mirror_start() should not be called at all.
1165	 */
1166	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1167	    ("Provider's error should be set (error=%d)(mirror=%s).",
1168	    bp->bio_to->error, bp->bio_to->name));
1169	G_MIRROR_LOGREQ(3, bp, "Request received.");
1170
1171	switch (bp->bio_cmd) {
1172	case BIO_READ:
1173	case BIO_WRITE:
1174	case BIO_DELETE:
1175		break;
1176	case BIO_FLUSH:
1177		g_mirror_flush(sc, bp);
1178		return;
1179	case BIO_GETATTR:
1180		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1181			g_mirror_candelete(bp);
1182			return;
1183		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1184			g_mirror_kernel_dump(bp);
1185			return;
1186		}
1187		/* FALLTHROUGH */
1188	default:
1189		g_io_deliver(bp, EOPNOTSUPP);
1190		return;
1191	}
1192	mtx_lock(&sc->sc_queue_mtx);
1193	if (bp->bio_to->error != 0) {
1194		mtx_unlock(&sc->sc_queue_mtx);
1195		g_io_deliver(bp, bp->bio_to->error);
1196		return;
1197	}
1198	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1199	mtx_unlock(&sc->sc_queue_mtx);
1200	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1201	wakeup(sc);
1202}
1203
1204/*
1205 * Return TRUE if the given request is colliding with a in-progress
1206 * synchronization request.
1207 */
1208static bool
1209g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1210{
1211	struct g_mirror_disk *disk;
1212	struct bio *sbp;
1213	off_t rstart, rend, sstart, send;
1214	u_int i;
1215
1216	if (sc->sc_sync.ds_ndisks == 0)
1217		return (false);
1218	rstart = bp->bio_offset;
1219	rend = bp->bio_offset + bp->bio_length;
1220	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1221		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1222			continue;
1223		for (i = 0; i < g_mirror_syncreqs; i++) {
1224			sbp = disk->d_sync.ds_bios[i];
1225			if (sbp == NULL)
1226				continue;
1227			sstart = sbp->bio_offset;
1228			send = sbp->bio_offset + sbp->bio_length;
1229			if (rend > sstart && rstart < send)
1230				return (true);
1231		}
1232	}
1233	return (false);
1234}
1235
1236/*
1237 * Return TRUE if the given sync request is colliding with a in-progress regular
1238 * request.
1239 */
1240static bool
1241g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1242{
1243	off_t rstart, rend, sstart, send;
1244	struct bio *bp;
1245
1246	if (sc->sc_sync.ds_ndisks == 0)
1247		return (false);
1248	sstart = sbp->bio_offset;
1249	send = sbp->bio_offset + sbp->bio_length;
1250	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1251		rstart = bp->bio_offset;
1252		rend = bp->bio_offset + bp->bio_length;
1253		if (rend > sstart && rstart < send)
1254			return (true);
1255	}
1256	return (false);
1257}
1258
1259/*
1260 * Puts request onto delayed queue.
1261 */
1262static void
1263g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1264{
1265
1266	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1267	TAILQ_INSERT_HEAD(&sc->sc_regular_delayed, bp, bio_queue);
1268}
1269
1270/*
1271 * Puts synchronization request onto delayed queue.
1272 */
1273static void
1274g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1275{
1276
1277	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1278	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1279}
1280
1281/*
1282 * Releases delayed regular requests which don't collide anymore with sync
1283 * requests.
1284 */
1285static void
1286g_mirror_regular_release(struct g_mirror_softc *sc)
1287{
1288	struct bio *bp, *bp2;
1289
1290	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed, bio_queue, bp2) {
1291		if (g_mirror_sync_collision(sc, bp))
1292			continue;
1293		TAILQ_REMOVE(&sc->sc_regular_delayed, bp, bio_queue);
1294		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1295		mtx_lock(&sc->sc_queue_mtx);
1296		TAILQ_INSERT_HEAD(&sc->sc_queue, bp, bio_queue);
1297		mtx_unlock(&sc->sc_queue_mtx);
1298	}
1299}
1300
1301/*
1302 * Releases delayed sync requests which don't collide anymore with regular
1303 * requests.
1304 */
1305static void
1306g_mirror_sync_release(struct g_mirror_softc *sc)
1307{
1308	struct bio *bp, *bp2;
1309
1310	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1311		if (g_mirror_regular_collision(sc, bp))
1312			continue;
1313		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1314		G_MIRROR_LOGREQ(2, bp,
1315		    "Releasing delayed synchronization request.");
1316		g_io_request(bp, bp->bio_from);
1317	}
1318}
1319
1320/*
1321 * Free a synchronization request and clear its slot in the array.
1322 */
1323static void
1324g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1325{
1326	int idx;
1327
1328	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1329		idx = (int)(uintptr_t)bp->bio_caller1;
1330		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1331		    ("unexpected sync BIO at %p:%d", disk, idx));
1332		disk->d_sync.ds_bios[idx] = NULL;
1333	}
1334	free(bp->bio_data, M_MIRROR);
1335	g_destroy_bio(bp);
1336}
1337
1338/*
1339 * Handle synchronization requests.
1340 * Every synchronization request is two-steps process: first, READ request is
1341 * send to active provider and then WRITE request (with read data) to the provider
1342 * being synchronized. When WRITE is finished, new synchronization request is
1343 * send.
1344 */
1345static void
1346g_mirror_sync_request(struct bio *bp)
1347{
1348	struct g_mirror_softc *sc;
1349	struct g_mirror_disk *disk;
1350	struct g_mirror_disk_sync *sync;
1351
1352	bp->bio_from->index--;
1353	sc = bp->bio_from->geom->softc;
1354	disk = bp->bio_from->private;
1355	if (disk == NULL) {
1356		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1357		g_topology_lock();
1358		g_mirror_kill_consumer(sc, bp->bio_from);
1359		g_topology_unlock();
1360		g_mirror_sync_request_free(NULL, bp);
1361		sx_xlock(&sc->sc_lock);
1362		return;
1363	}
1364
1365	/*
1366	 * Synchronization request.
1367	 */
1368	switch (bp->bio_cmd) {
1369	case BIO_READ:
1370	    {
1371		struct g_consumer *cp;
1372
1373		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1374		    bp->bio_error);
1375
1376		if (bp->bio_error != 0) {
1377			G_MIRROR_LOGREQ(0, bp,
1378			    "Synchronization request failed (error=%d).",
1379			    bp->bio_error);
1380			g_mirror_sync_request_free(disk, bp);
1381			return;
1382		}
1383		G_MIRROR_LOGREQ(3, bp,
1384		    "Synchronization request half-finished.");
1385		bp->bio_cmd = BIO_WRITE;
1386		bp->bio_cflags = 0;
1387		cp = disk->d_consumer;
1388		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1389		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1390		    cp->acr, cp->acw, cp->ace));
1391		cp->index++;
1392		g_io_request(bp, cp);
1393		return;
1394	    }
1395	case BIO_WRITE:
1396	    {
1397		off_t offset;
1398		void *data;
1399		int i, idx;
1400
1401		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1402		    bp->bio_error);
1403
1404		if (bp->bio_error != 0) {
1405			G_MIRROR_LOGREQ(0, bp,
1406			    "Synchronization request failed (error=%d).",
1407			    bp->bio_error);
1408			g_mirror_sync_request_free(disk, bp);
1409			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1410			g_mirror_event_send(disk,
1411			    G_MIRROR_DISK_STATE_DISCONNECTED,
1412			    G_MIRROR_EVENT_DONTWAIT);
1413			return;
1414		}
1415		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1416		sync = &disk->d_sync;
1417		if (sync->ds_offset >= sc->sc_mediasize ||
1418		    sync->ds_consumer == NULL ||
1419		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1420			/* Don't send more synchronization requests. */
1421			sync->ds_inflight--;
1422			g_mirror_sync_request_free(disk, bp);
1423			if (sync->ds_inflight > 0)
1424				return;
1425			if (sync->ds_consumer == NULL ||
1426			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1427				return;
1428			}
1429			/* Disk up-to-date, activate it. */
1430			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1431			    G_MIRROR_EVENT_DONTWAIT);
1432			return;
1433		}
1434
1435		/* Send next synchronization request. */
1436		data = bp->bio_data;
1437		idx = (int)(uintptr_t)bp->bio_caller1;
1438		g_reset_bio(bp);
1439		bp->bio_cmd = BIO_READ;
1440		bp->bio_offset = sync->ds_offset;
1441		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1442		sync->ds_offset += bp->bio_length;
1443		bp->bio_done = g_mirror_sync_done;
1444		bp->bio_data = data;
1445		bp->bio_from = sync->ds_consumer;
1446		bp->bio_to = sc->sc_provider;
1447		bp->bio_caller1 = (void *)(uintptr_t)idx;
1448		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1449		sync->ds_consumer->index++;
1450		/*
1451		 * Delay the request if it is colliding with a regular request.
1452		 */
1453		if (g_mirror_regular_collision(sc, bp))
1454			g_mirror_sync_delay(sc, bp);
1455		else
1456			g_io_request(bp, sync->ds_consumer);
1457
1458		/* Release delayed requests if possible. */
1459		g_mirror_regular_release(sc);
1460
1461		/* Find the smallest offset */
1462		offset = sc->sc_mediasize;
1463		for (i = 0; i < g_mirror_syncreqs; i++) {
1464			bp = sync->ds_bios[i];
1465			if (bp != NULL && bp->bio_offset < offset)
1466				offset = bp->bio_offset;
1467		}
1468		if (g_mirror_sync_period > 0 &&
1469		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1470			sync->ds_offset_done = offset;
1471			g_mirror_update_metadata(disk);
1472			sync->ds_update_ts = time_uptime;
1473		}
1474		return;
1475	    }
1476	default:
1477		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1478		    bp->bio_cmd, sc->sc_name));
1479		break;
1480	}
1481}
1482
1483static void
1484g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1485{
1486	struct g_mirror_disk *disk;
1487	struct g_consumer *cp;
1488	struct bio *cbp;
1489
1490	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1491		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1492			break;
1493	}
1494	if (disk == NULL) {
1495		if (bp->bio_error == 0)
1496			bp->bio_error = ENXIO;
1497		g_io_deliver(bp, bp->bio_error);
1498		return;
1499	}
1500	cbp = g_clone_bio(bp);
1501	if (cbp == NULL) {
1502		if (bp->bio_error == 0)
1503			bp->bio_error = ENOMEM;
1504		g_io_deliver(bp, bp->bio_error);
1505		return;
1506	}
1507	/*
1508	 * Fill in the component buf structure.
1509	 */
1510	cp = disk->d_consumer;
1511	cbp->bio_done = g_mirror_done;
1512	cbp->bio_to = cp->provider;
1513	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1514	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1515	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1516	    cp->acw, cp->ace));
1517	cp->index++;
1518	g_io_request(cbp, cp);
1519}
1520
1521static void
1522g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1523{
1524	struct g_mirror_disk *disk;
1525	struct g_consumer *cp;
1526	struct bio *cbp;
1527
1528	disk = g_mirror_get_disk(sc);
1529	if (disk == NULL) {
1530		if (bp->bio_error == 0)
1531			bp->bio_error = ENXIO;
1532		g_io_deliver(bp, bp->bio_error);
1533		return;
1534	}
1535	cbp = g_clone_bio(bp);
1536	if (cbp == NULL) {
1537		if (bp->bio_error == 0)
1538			bp->bio_error = ENOMEM;
1539		g_io_deliver(bp, bp->bio_error);
1540		return;
1541	}
1542	/*
1543	 * Fill in the component buf structure.
1544	 */
1545	cp = disk->d_consumer;
1546	cbp->bio_done = g_mirror_done;
1547	cbp->bio_to = cp->provider;
1548	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1549	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1550	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1551	    cp->acw, cp->ace));
1552	cp->index++;
1553	g_io_request(cbp, cp);
1554}
1555
1556#define TRACK_SIZE  (1 * 1024 * 1024)
1557#define LOAD_SCALE	256
1558#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1559
1560static void
1561g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1562{
1563	struct g_mirror_disk *disk, *dp;
1564	struct g_consumer *cp;
1565	struct bio *cbp;
1566	int prio, best;
1567
1568	/* Find a disk with the smallest load. */
1569	disk = NULL;
1570	best = INT_MAX;
1571	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1572		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1573			continue;
1574		prio = dp->load;
1575		/* If disk head is precisely in position - highly prefer it. */
1576		if (dp->d_last_offset == bp->bio_offset)
1577			prio -= 2 * LOAD_SCALE;
1578		else
1579		/* If disk head is close to position - prefer it. */
1580		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1581			prio -= 1 * LOAD_SCALE;
1582		if (prio <= best) {
1583			disk = dp;
1584			best = prio;
1585		}
1586	}
1587	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1588	cbp = g_clone_bio(bp);
1589	if (cbp == NULL) {
1590		if (bp->bio_error == 0)
1591			bp->bio_error = ENOMEM;
1592		g_io_deliver(bp, bp->bio_error);
1593		return;
1594	}
1595	/*
1596	 * Fill in the component buf structure.
1597	 */
1598	cp = disk->d_consumer;
1599	cbp->bio_done = g_mirror_done;
1600	cbp->bio_to = cp->provider;
1601	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1602	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1603	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1604	    cp->acw, cp->ace));
1605	cp->index++;
1606	/* Remember last head position */
1607	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1608	/* Update loads. */
1609	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1610		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1611		    dp->load * 7) / 8;
1612	}
1613	g_io_request(cbp, cp);
1614}
1615
1616static void
1617g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1618{
1619	struct bio_queue queue;
1620	struct g_mirror_disk *disk;
1621	struct g_consumer *cp;
1622	struct bio *cbp;
1623	off_t left, mod, offset, slice;
1624	u_char *data;
1625	u_int ndisks;
1626
1627	if (bp->bio_length <= sc->sc_slice) {
1628		g_mirror_request_round_robin(sc, bp);
1629		return;
1630	}
1631	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1632	slice = bp->bio_length / ndisks;
1633	mod = slice % sc->sc_provider->sectorsize;
1634	if (mod != 0)
1635		slice += sc->sc_provider->sectorsize - mod;
1636	/*
1637	 * Allocate all bios before sending any request, so we can
1638	 * return ENOMEM in nice and clean way.
1639	 */
1640	left = bp->bio_length;
1641	offset = bp->bio_offset;
1642	data = bp->bio_data;
1643	TAILQ_INIT(&queue);
1644	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1645		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1646			continue;
1647		cbp = g_clone_bio(bp);
1648		if (cbp == NULL) {
1649			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1650				TAILQ_REMOVE(&queue, cbp, bio_queue);
1651				g_destroy_bio(cbp);
1652			}
1653			if (bp->bio_error == 0)
1654				bp->bio_error = ENOMEM;
1655			g_io_deliver(bp, bp->bio_error);
1656			return;
1657		}
1658		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1659		cbp->bio_done = g_mirror_done;
1660		cbp->bio_caller1 = disk;
1661		cbp->bio_to = disk->d_consumer->provider;
1662		cbp->bio_offset = offset;
1663		cbp->bio_data = data;
1664		cbp->bio_length = MIN(left, slice);
1665		left -= cbp->bio_length;
1666		if (left == 0)
1667			break;
1668		offset += cbp->bio_length;
1669		data += cbp->bio_length;
1670	}
1671	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1672		TAILQ_REMOVE(&queue, cbp, bio_queue);
1673		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1674		disk = cbp->bio_caller1;
1675		cbp->bio_caller1 = NULL;
1676		cp = disk->d_consumer;
1677		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1678		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1679		    cp->acr, cp->acw, cp->ace));
1680		disk->d_consumer->index++;
1681		g_io_request(cbp, disk->d_consumer);
1682	}
1683}
1684
1685static void
1686g_mirror_register_request(struct bio *bp)
1687{
1688	struct g_mirror_softc *sc;
1689
1690	sc = bp->bio_to->private;
1691	switch (bp->bio_cmd) {
1692	case BIO_READ:
1693		switch (sc->sc_balance) {
1694		case G_MIRROR_BALANCE_LOAD:
1695			g_mirror_request_load(sc, bp);
1696			break;
1697		case G_MIRROR_BALANCE_PREFER:
1698			g_mirror_request_prefer(sc, bp);
1699			break;
1700		case G_MIRROR_BALANCE_ROUND_ROBIN:
1701			g_mirror_request_round_robin(sc, bp);
1702			break;
1703		case G_MIRROR_BALANCE_SPLIT:
1704			g_mirror_request_split(sc, bp);
1705			break;
1706		}
1707		return;
1708	case BIO_WRITE:
1709	case BIO_DELETE:
1710	    {
1711		struct bio_queue queue;
1712		struct g_mirror_disk *disk;
1713		struct g_mirror_disk_sync *sync;
1714		struct g_consumer *cp;
1715		struct bio *cbp;
1716
1717		/*
1718		 * Delay the request if it is colliding with a synchronization
1719		 * request.
1720		 */
1721		if (g_mirror_sync_collision(sc, bp)) {
1722			g_mirror_regular_delay(sc, bp);
1723			return;
1724		}
1725
1726		if (sc->sc_idle)
1727			g_mirror_unidle(sc);
1728		else
1729			sc->sc_last_write = time_uptime;
1730
1731		/*
1732		 * Bump syncid on first write.
1733		 */
1734		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1735			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1736			g_mirror_bump_syncid(sc);
1737		}
1738
1739		/*
1740		 * Allocate all bios before sending any request, so we can
1741		 * return ENOMEM in nice and clean way.
1742		 */
1743		TAILQ_INIT(&queue);
1744		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1745			sync = &disk->d_sync;
1746			switch (disk->d_state) {
1747			case G_MIRROR_DISK_STATE_ACTIVE:
1748				break;
1749			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1750				if (bp->bio_offset >= sync->ds_offset)
1751					continue;
1752				break;
1753			default:
1754				continue;
1755			}
1756			if (bp->bio_cmd == BIO_DELETE &&
1757			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1758				continue;
1759			cbp = g_clone_bio(bp);
1760			if (cbp == NULL) {
1761				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1762					TAILQ_REMOVE(&queue, cbp, bio_queue);
1763					g_destroy_bio(cbp);
1764				}
1765				if (bp->bio_error == 0)
1766					bp->bio_error = ENOMEM;
1767				g_io_deliver(bp, bp->bio_error);
1768				return;
1769			}
1770			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1771			cbp->bio_done = g_mirror_done;
1772			cp = disk->d_consumer;
1773			cbp->bio_caller1 = cp;
1774			cbp->bio_to = cp->provider;
1775			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1776			    ("Consumer %s not opened (r%dw%de%d).",
1777			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1778		}
1779		if (TAILQ_EMPTY(&queue)) {
1780			g_io_deliver(bp, EOPNOTSUPP);
1781			return;
1782		}
1783		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1784			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1785			TAILQ_REMOVE(&queue, cbp, bio_queue);
1786			cp = cbp->bio_caller1;
1787			cbp->bio_caller1 = NULL;
1788			cp->index++;
1789			sc->sc_writes++;
1790			g_io_request(cbp, cp);
1791		}
1792		/*
1793		 * Put request onto inflight queue, so we can check if new
1794		 * synchronization requests don't collide with it.
1795		 */
1796		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1797		return;
1798	    }
1799	default:
1800		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1801		    bp->bio_cmd, sc->sc_name));
1802		break;
1803	}
1804}
1805
1806static int
1807g_mirror_can_destroy(struct g_mirror_softc *sc)
1808{
1809	struct g_geom *gp;
1810	struct g_consumer *cp;
1811
1812	g_topology_assert();
1813	gp = sc->sc_geom;
1814	if (gp->softc == NULL)
1815		return (1);
1816	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1817		return (0);
1818	LIST_FOREACH(cp, &gp->consumer, consumer) {
1819		if (g_mirror_is_busy(sc, cp))
1820			return (0);
1821	}
1822	gp = sc->sc_sync.ds_geom;
1823	LIST_FOREACH(cp, &gp->consumer, consumer) {
1824		if (g_mirror_is_busy(sc, cp))
1825			return (0);
1826	}
1827	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1828	    sc->sc_name);
1829	return (1);
1830}
1831
1832static int
1833g_mirror_try_destroy(struct g_mirror_softc *sc)
1834{
1835
1836	if (sc->sc_rootmount != NULL) {
1837		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1838		    sc->sc_rootmount);
1839		root_mount_rel(sc->sc_rootmount);
1840		sc->sc_rootmount = NULL;
1841	}
1842	g_topology_lock();
1843	if (!g_mirror_can_destroy(sc)) {
1844		g_topology_unlock();
1845		return (0);
1846	}
1847	sc->sc_geom->softc = NULL;
1848	sc->sc_sync.ds_geom->softc = NULL;
1849	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1850		g_topology_unlock();
1851		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1852		    &sc->sc_worker);
1853		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1854		sx_xunlock(&sc->sc_lock);
1855		wakeup(&sc->sc_worker);
1856		sc->sc_worker = NULL;
1857	} else {
1858		g_topology_unlock();
1859		g_mirror_destroy_device(sc);
1860	}
1861	return (1);
1862}
1863
1864/*
1865 * Worker thread.
1866 */
1867static void
1868g_mirror_worker(void *arg)
1869{
1870	struct g_mirror_softc *sc;
1871	struct g_mirror_event *ep;
1872	struct bio *bp;
1873	int timeout;
1874
1875	sc = arg;
1876	thread_lock(curthread);
1877	sched_prio(curthread, PRIBIO);
1878	thread_unlock(curthread);
1879
1880	sx_xlock(&sc->sc_lock);
1881	for (;;) {
1882		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1883		/*
1884		 * First take a look at events.
1885		 * This is important to handle events before any I/O requests.
1886		 */
1887		ep = g_mirror_event_first(sc);
1888		if (ep != NULL) {
1889			g_mirror_event_remove(sc, ep);
1890			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1891				/* Update only device status. */
1892				G_MIRROR_DEBUG(3,
1893				    "Running event for device %s.",
1894				    sc->sc_name);
1895				ep->e_error = 0;
1896				g_mirror_update_device(sc, true);
1897			} else {
1898				/* Update disk status. */
1899				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1900				     g_mirror_get_diskname(ep->e_disk));
1901				ep->e_error = g_mirror_update_disk(ep->e_disk,
1902				    ep->e_state);
1903				if (ep->e_error == 0)
1904					g_mirror_update_device(sc, false);
1905			}
1906			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1907				KASSERT(ep->e_error == 0,
1908				    ("Error cannot be handled."));
1909				g_mirror_event_free(ep);
1910			} else {
1911				ep->e_flags |= G_MIRROR_EVENT_DONE;
1912				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1913				    ep);
1914				mtx_lock(&sc->sc_events_mtx);
1915				wakeup(ep);
1916				mtx_unlock(&sc->sc_events_mtx);
1917			}
1918			if ((sc->sc_flags &
1919			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1920				if (g_mirror_try_destroy(sc)) {
1921					curthread->td_pflags &= ~TDP_GEOM;
1922					G_MIRROR_DEBUG(1, "Thread exiting.");
1923					kproc_exit(0);
1924				}
1925			}
1926			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1927			continue;
1928		}
1929		/*
1930		 * Check if we can mark array as CLEAN and if we can't take
1931		 * how much seconds should we wait.
1932		 */
1933		timeout = g_mirror_idle(sc, -1);
1934		/*
1935		 * Now I/O requests.
1936		 */
1937		/* Get first request from the queue. */
1938		mtx_lock(&sc->sc_queue_mtx);
1939		bp = TAILQ_FIRST(&sc->sc_queue);
1940		if (bp != NULL)
1941			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1942		else {
1943			if ((sc->sc_flags &
1944			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1945				mtx_unlock(&sc->sc_queue_mtx);
1946				if (g_mirror_try_destroy(sc)) {
1947					curthread->td_pflags &= ~TDP_GEOM;
1948					G_MIRROR_DEBUG(1, "Thread exiting.");
1949					kproc_exit(0);
1950				}
1951				mtx_lock(&sc->sc_queue_mtx);
1952				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1953					mtx_unlock(&sc->sc_queue_mtx);
1954					continue;
1955				}
1956			}
1957			if (g_mirror_event_first(sc) != NULL) {
1958				mtx_unlock(&sc->sc_queue_mtx);
1959				continue;
1960			}
1961			sx_xunlock(&sc->sc_lock);
1962			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1963			    timeout * hz);
1964			sx_xlock(&sc->sc_lock);
1965			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1966			continue;
1967		}
1968		mtx_unlock(&sc->sc_queue_mtx);
1969
1970		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1971		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1972			g_mirror_sync_request(bp);	/* READ */
1973		} else if (bp->bio_to != sc->sc_provider) {
1974			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1975				g_mirror_regular_request(bp);
1976			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1977				g_mirror_sync_request(bp);	/* WRITE */
1978			else {
1979				KASSERT(0,
1980				    ("Invalid request cflags=0x%hx to=%s.",
1981				    bp->bio_cflags, bp->bio_to->name));
1982			}
1983		} else {
1984			g_mirror_register_request(bp);
1985		}
1986		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1987	}
1988}
1989
1990static void
1991g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1992{
1993
1994	sx_assert(&sc->sc_lock, SX_LOCKED);
1995
1996	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1997		return;
1998	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1999		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2000		    g_mirror_get_diskname(disk), sc->sc_name);
2001		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2002	} else if (sc->sc_idle &&
2003	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2004		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2005		    g_mirror_get_diskname(disk), sc->sc_name);
2006		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2007	}
2008}
2009
2010static void
2011g_mirror_sync_start(struct g_mirror_disk *disk)
2012{
2013	struct g_mirror_softc *sc;
2014	struct g_consumer *cp;
2015	struct bio *bp;
2016	int error, i;
2017
2018	g_topology_assert_not();
2019	sc = disk->d_softc;
2020	sx_assert(&sc->sc_lock, SX_LOCKED);
2021
2022	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2023	    ("Disk %s is not marked for synchronization.",
2024	    g_mirror_get_diskname(disk)));
2025	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2026	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2027	    sc->sc_state));
2028
2029	sx_xunlock(&sc->sc_lock);
2030	g_topology_lock();
2031	cp = g_new_consumer(sc->sc_sync.ds_geom);
2032	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2033	error = g_attach(cp, sc->sc_provider);
2034	KASSERT(error == 0,
2035	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2036	error = g_access(cp, 1, 0, 0);
2037	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2038	g_topology_unlock();
2039	sx_xlock(&sc->sc_lock);
2040
2041	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2042	    g_mirror_get_diskname(disk));
2043	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2044		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2045	KASSERT(disk->d_sync.ds_consumer == NULL,
2046	    ("Sync consumer already exists (device=%s, disk=%s).",
2047	    sc->sc_name, g_mirror_get_diskname(disk)));
2048
2049	disk->d_sync.ds_consumer = cp;
2050	disk->d_sync.ds_consumer->private = disk;
2051	disk->d_sync.ds_consumer->index = 0;
2052
2053	/*
2054	 * Allocate memory for synchronization bios and initialize them.
2055	 */
2056	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2057	    M_MIRROR, M_WAITOK);
2058	for (i = 0; i < g_mirror_syncreqs; i++) {
2059		bp = g_alloc_bio();
2060		disk->d_sync.ds_bios[i] = bp;
2061		bp->bio_parent = NULL;
2062		bp->bio_cmd = BIO_READ;
2063		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2064		bp->bio_cflags = 0;
2065		bp->bio_offset = disk->d_sync.ds_offset;
2066		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2067		disk->d_sync.ds_offset += bp->bio_length;
2068		bp->bio_done = g_mirror_sync_done;
2069		bp->bio_from = disk->d_sync.ds_consumer;
2070		bp->bio_to = sc->sc_provider;
2071		bp->bio_caller1 = (void *)(uintptr_t)i;
2072	}
2073
2074	/* Increase the number of disks in SYNCHRONIZING state. */
2075	sc->sc_sync.ds_ndisks++;
2076	/* Set the number of in-flight synchronization requests. */
2077	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2078
2079	/*
2080	 * Fire off first synchronization requests.
2081	 */
2082	for (i = 0; i < g_mirror_syncreqs; i++) {
2083		bp = disk->d_sync.ds_bios[i];
2084		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2085		disk->d_sync.ds_consumer->index++;
2086		/*
2087		 * Delay the request if it is colliding with a regular request.
2088		 */
2089		if (g_mirror_regular_collision(sc, bp))
2090			g_mirror_sync_delay(sc, bp);
2091		else
2092			g_io_request(bp, disk->d_sync.ds_consumer);
2093	}
2094}
2095
2096/*
2097 * Stop synchronization process.
2098 * type: 0 - synchronization finished
2099 *       1 - synchronization stopped
2100 */
2101static void
2102g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2103{
2104	struct g_mirror_softc *sc;
2105	struct g_consumer *cp;
2106
2107	g_topology_assert_not();
2108	sc = disk->d_softc;
2109	sx_assert(&sc->sc_lock, SX_LOCKED);
2110
2111	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2112	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2113	    g_mirror_disk_state2str(disk->d_state)));
2114	if (disk->d_sync.ds_consumer == NULL)
2115		return;
2116
2117	if (type == 0) {
2118		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2119		    sc->sc_name, g_mirror_get_diskname(disk));
2120	} else /* if (type == 1) */ {
2121		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2122		    sc->sc_name, g_mirror_get_diskname(disk));
2123	}
2124	g_mirror_regular_release(sc);
2125	free(disk->d_sync.ds_bios, M_MIRROR);
2126	disk->d_sync.ds_bios = NULL;
2127	cp = disk->d_sync.ds_consumer;
2128	disk->d_sync.ds_consumer = NULL;
2129	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2130	sc->sc_sync.ds_ndisks--;
2131	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2132	g_topology_lock();
2133	g_mirror_kill_consumer(sc, cp);
2134	g_topology_unlock();
2135	sx_xlock(&sc->sc_lock);
2136}
2137
2138static void
2139g_mirror_launch_provider(struct g_mirror_softc *sc)
2140{
2141	struct g_mirror_disk *disk;
2142	struct g_provider *pp, *dp;
2143
2144	sx_assert(&sc->sc_lock, SX_LOCKED);
2145
2146	g_topology_lock();
2147	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2148	pp->flags |= G_PF_DIRECT_RECEIVE;
2149	pp->mediasize = sc->sc_mediasize;
2150	pp->sectorsize = sc->sc_sectorsize;
2151	pp->stripesize = 0;
2152	pp->stripeoffset = 0;
2153
2154	/* Splitting of unmapped BIO's could work but isn't implemented now */
2155	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2156		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2157
2158	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2159		if (disk->d_consumer && disk->d_consumer->provider) {
2160			dp = disk->d_consumer->provider;
2161			if (dp->stripesize > pp->stripesize) {
2162				pp->stripesize = dp->stripesize;
2163				pp->stripeoffset = dp->stripeoffset;
2164			}
2165			/* A provider underneath us doesn't support unmapped */
2166			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2167				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2168				    "because of %s.", dp->name);
2169				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2170			}
2171		}
2172	}
2173	pp->private = sc;
2174	sc->sc_refcnt++;
2175	sc->sc_provider = pp;
2176	g_error_provider(pp, 0);
2177	g_topology_unlock();
2178	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2179	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2180	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2181		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2182			g_mirror_sync_start(disk);
2183	}
2184}
2185
2186static void
2187g_mirror_destroy_provider(struct g_mirror_softc *sc)
2188{
2189	struct g_mirror_disk *disk;
2190	struct bio *bp;
2191
2192	g_topology_assert_not();
2193	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2194	    sc->sc_name));
2195
2196	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2197		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2198			g_mirror_sync_stop(disk, 1);
2199	}
2200
2201	g_topology_lock();
2202	g_error_provider(sc->sc_provider, ENXIO);
2203	mtx_lock(&sc->sc_queue_mtx);
2204	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2205		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2206		/*
2207		 * Abort any pending I/O that wasn't generated by us.
2208		 * Synchronization requests and requests destined for individual
2209		 * mirror components can be destroyed immediately.
2210		 */
2211		if (bp->bio_to == sc->sc_provider &&
2212		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2213			g_io_deliver(bp, ENXIO);
2214		} else {
2215			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2216				free(bp->bio_data, M_MIRROR);
2217			g_destroy_bio(bp);
2218		}
2219	}
2220	mtx_unlock(&sc->sc_queue_mtx);
2221	g_wither_provider(sc->sc_provider, ENXIO);
2222	sc->sc_provider = NULL;
2223	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2224	g_topology_unlock();
2225}
2226
2227static void
2228g_mirror_go(void *arg)
2229{
2230	struct g_mirror_softc *sc;
2231
2232	sc = arg;
2233	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2234	g_mirror_event_send(sc, 0,
2235	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2236}
2237
2238static u_int
2239g_mirror_determine_state(struct g_mirror_disk *disk)
2240{
2241	struct g_mirror_softc *sc;
2242	u_int state;
2243
2244	sc = disk->d_softc;
2245	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2246		if ((disk->d_flags &
2247		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2248		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2249		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2250			/* Disk does not need synchronization. */
2251			state = G_MIRROR_DISK_STATE_ACTIVE;
2252		} else {
2253			if ((sc->sc_flags &
2254			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2255			    (disk->d_flags &
2256			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2257				/*
2258				 * We can start synchronization from
2259				 * the stored offset.
2260				 */
2261				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2262			} else {
2263				state = G_MIRROR_DISK_STATE_STALE;
2264			}
2265		}
2266	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2267		/*
2268		 * Reset all synchronization data for this disk,
2269		 * because if it even was synchronized, it was
2270		 * synchronized to disks with different syncid.
2271		 */
2272		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2273		disk->d_sync.ds_offset = 0;
2274		disk->d_sync.ds_offset_done = 0;
2275		disk->d_sync.ds_syncid = sc->sc_syncid;
2276		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2277		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2278			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2279		} else {
2280			state = G_MIRROR_DISK_STATE_STALE;
2281		}
2282	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2283		/*
2284		 * Not good, NOT GOOD!
2285		 * It means that mirror was started on stale disks
2286		 * and more fresh disk just arrive.
2287		 * If there were writes, mirror is broken, sorry.
2288		 * I think the best choice here is don't touch
2289		 * this disk and inform the user loudly.
2290		 */
2291		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2292		    "disk (%s) arrives!! It will not be connected to the "
2293		    "running device.", sc->sc_name,
2294		    g_mirror_get_diskname(disk));
2295		g_mirror_destroy_disk(disk);
2296		state = G_MIRROR_DISK_STATE_NONE;
2297		/* Return immediately, because disk was destroyed. */
2298		return (state);
2299	}
2300	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2301	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2302	return (state);
2303}
2304
2305/*
2306 * Update device state.
2307 */
2308static void
2309g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2310{
2311	struct g_mirror_disk *disk;
2312	u_int state;
2313
2314	sx_assert(&sc->sc_lock, SX_XLOCKED);
2315
2316	switch (sc->sc_state) {
2317	case G_MIRROR_DEVICE_STATE_STARTING:
2318	    {
2319		struct g_mirror_disk *pdisk, *tdisk;
2320		u_int dirty, ndisks, genid, syncid;
2321		bool broken;
2322
2323		KASSERT(sc->sc_provider == NULL,
2324		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2325		/*
2326		 * Are we ready? We are, if all disks are connected or
2327		 * if we have any disks and 'force' is true.
2328		 */
2329		ndisks = g_mirror_ndisks(sc, -1);
2330		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2331			;
2332		} else if (ndisks == 0) {
2333			/*
2334			 * Disks went down in starting phase, so destroy
2335			 * device.
2336			 */
2337			callout_drain(&sc->sc_callout);
2338			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2339			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2340			    sc->sc_rootmount);
2341			root_mount_rel(sc->sc_rootmount);
2342			sc->sc_rootmount = NULL;
2343			return;
2344		} else {
2345			return;
2346		}
2347
2348		/*
2349		 * Activate all disks with the biggest syncid.
2350		 */
2351		if (force) {
2352			/*
2353			 * If 'force' is true, we have been called due to
2354			 * timeout, so don't bother canceling timeout.
2355			 */
2356			ndisks = 0;
2357			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2358				if ((disk->d_flags &
2359				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2360					ndisks++;
2361				}
2362			}
2363			if (ndisks == 0) {
2364				/* No valid disks found, destroy device. */
2365				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2366				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2367				    __LINE__, sc->sc_rootmount);
2368				root_mount_rel(sc->sc_rootmount);
2369				sc->sc_rootmount = NULL;
2370				return;
2371			}
2372		} else {
2373			/* Cancel timeout. */
2374			callout_drain(&sc->sc_callout);
2375		}
2376
2377		/*
2378		 * Find the biggest genid.
2379		 */
2380		genid = 0;
2381		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2382			if (disk->d_genid > genid)
2383				genid = disk->d_genid;
2384		}
2385		sc->sc_genid = genid;
2386		/*
2387		 * Remove all disks without the biggest genid.
2388		 */
2389		broken = false;
2390		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2391			if (disk->d_genid < genid) {
2392				G_MIRROR_DEBUG(0,
2393				    "Component %s (device %s) broken, skipping.",
2394				    g_mirror_get_diskname(disk), sc->sc_name);
2395				g_mirror_destroy_disk(disk);
2396				/*
2397				 * Bump the syncid in case we discover a healthy
2398				 * replacement disk after starting the mirror.
2399				 */
2400				broken = true;
2401			}
2402		}
2403
2404		/*
2405		 * Find the biggest syncid.
2406		 */
2407		syncid = 0;
2408		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2409			if (disk->d_sync.ds_syncid > syncid)
2410				syncid = disk->d_sync.ds_syncid;
2411		}
2412
2413		/*
2414		 * Here we need to look for dirty disks and if all disks
2415		 * with the biggest syncid are dirty, we have to choose
2416		 * one with the biggest priority and rebuild the rest.
2417		 */
2418		/*
2419		 * Find the number of dirty disks with the biggest syncid.
2420		 * Find the number of disks with the biggest syncid.
2421		 * While here, find a disk with the biggest priority.
2422		 */
2423		dirty = ndisks = 0;
2424		pdisk = NULL;
2425		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2426			if (disk->d_sync.ds_syncid != syncid)
2427				continue;
2428			if ((disk->d_flags &
2429			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2430				continue;
2431			}
2432			ndisks++;
2433			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2434				dirty++;
2435				if (pdisk == NULL ||
2436				    pdisk->d_priority < disk->d_priority) {
2437					pdisk = disk;
2438				}
2439			}
2440		}
2441		if (dirty == 0) {
2442			/* No dirty disks at all, great. */
2443		} else if (dirty == ndisks) {
2444			/*
2445			 * Force synchronization for all dirty disks except one
2446			 * with the biggest priority.
2447			 */
2448			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2449			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2450			    "master disk for synchronization.",
2451			    g_mirror_get_diskname(pdisk), sc->sc_name);
2452			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2453				if (disk->d_sync.ds_syncid != syncid)
2454					continue;
2455				if ((disk->d_flags &
2456				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2457					continue;
2458				}
2459				KASSERT((disk->d_flags &
2460				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2461				    ("Disk %s isn't marked as dirty.",
2462				    g_mirror_get_diskname(disk)));
2463				/* Skip the disk with the biggest priority. */
2464				if (disk == pdisk)
2465					continue;
2466				disk->d_sync.ds_syncid = 0;
2467			}
2468		} else if (dirty < ndisks) {
2469			/*
2470			 * Force synchronization for all dirty disks.
2471			 * We have some non-dirty disks.
2472			 */
2473			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2474				if (disk->d_sync.ds_syncid != syncid)
2475					continue;
2476				if ((disk->d_flags &
2477				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2478					continue;
2479				}
2480				if ((disk->d_flags &
2481				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2482					continue;
2483				}
2484				disk->d_sync.ds_syncid = 0;
2485			}
2486		}
2487
2488		/* Reset hint. */
2489		sc->sc_hint = NULL;
2490		sc->sc_syncid = syncid;
2491		if (force || broken) {
2492			/* Remember to bump syncid on first write. */
2493			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2494		}
2495		state = G_MIRROR_DEVICE_STATE_RUNNING;
2496		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2497		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2498		    g_mirror_device_state2str(state));
2499		sc->sc_state = state;
2500		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2501			state = g_mirror_determine_state(disk);
2502			g_mirror_event_send(disk, state,
2503			    G_MIRROR_EVENT_DONTWAIT);
2504			if (state == G_MIRROR_DISK_STATE_STALE)
2505				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2506		}
2507		break;
2508	    }
2509	case G_MIRROR_DEVICE_STATE_RUNNING:
2510		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2511		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2512			/*
2513			 * No usable disks, so destroy the device.
2514			 */
2515			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2516			break;
2517		} else if (g_mirror_ndisks(sc,
2518		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2519		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2520			/*
2521			 * We have active disks, launch provider if it doesn't
2522			 * exist.
2523			 */
2524			if (sc->sc_provider == NULL)
2525				g_mirror_launch_provider(sc);
2526			if (sc->sc_rootmount != NULL) {
2527				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2528				    __LINE__, sc->sc_rootmount);
2529				root_mount_rel(sc->sc_rootmount);
2530				sc->sc_rootmount = NULL;
2531			}
2532		}
2533		/*
2534		 * Genid should be bumped immediately, so do it here.
2535		 */
2536		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2537			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2538			g_mirror_bump_genid(sc);
2539		}
2540		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2541			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2542			g_mirror_bump_syncid(sc);
2543		}
2544		break;
2545	default:
2546		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2547		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2548		break;
2549	}
2550}
2551
2552/*
2553 * Update disk state and device state if needed.
2554 */
2555#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2556	"Disk %s state changed from %s to %s (device %s).",		\
2557	g_mirror_get_diskname(disk),					\
2558	g_mirror_disk_state2str(disk->d_state),				\
2559	g_mirror_disk_state2str(state), sc->sc_name)
2560static int
2561g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2562{
2563	struct g_mirror_softc *sc;
2564
2565	sc = disk->d_softc;
2566	sx_assert(&sc->sc_lock, SX_XLOCKED);
2567
2568again:
2569	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2570	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2571	    g_mirror_disk_state2str(state));
2572	switch (state) {
2573	case G_MIRROR_DISK_STATE_NEW:
2574		/*
2575		 * Possible scenarios:
2576		 * 1. New disk arrive.
2577		 */
2578		/* Previous state should be NONE. */
2579		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2580		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2581		    g_mirror_disk_state2str(disk->d_state)));
2582		DISK_STATE_CHANGED();
2583
2584		disk->d_state = state;
2585		if (LIST_EMPTY(&sc->sc_disks))
2586			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2587		else {
2588			struct g_mirror_disk *dp;
2589
2590			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2591				if (disk->d_priority >= dp->d_priority) {
2592					LIST_INSERT_BEFORE(dp, disk, d_next);
2593					dp = NULL;
2594					break;
2595				}
2596				if (LIST_NEXT(dp, d_next) == NULL)
2597					break;
2598			}
2599			if (dp != NULL)
2600				LIST_INSERT_AFTER(dp, disk, d_next);
2601		}
2602		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2603		    sc->sc_name, g_mirror_get_diskname(disk));
2604		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2605			break;
2606		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2607		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2608		    g_mirror_device_state2str(sc->sc_state),
2609		    g_mirror_get_diskname(disk),
2610		    g_mirror_disk_state2str(disk->d_state)));
2611		state = g_mirror_determine_state(disk);
2612		if (state != G_MIRROR_DISK_STATE_NONE)
2613			goto again;
2614		break;
2615	case G_MIRROR_DISK_STATE_ACTIVE:
2616		/*
2617		 * Possible scenarios:
2618		 * 1. New disk does not need synchronization.
2619		 * 2. Synchronization process finished successfully.
2620		 */
2621		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2622		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2623		    g_mirror_device_state2str(sc->sc_state),
2624		    g_mirror_get_diskname(disk),
2625		    g_mirror_disk_state2str(disk->d_state)));
2626		/* Previous state should be NEW or SYNCHRONIZING. */
2627		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2628		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2629		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2630		    g_mirror_disk_state2str(disk->d_state)));
2631		DISK_STATE_CHANGED();
2632
2633		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2634			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2635			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2636			g_mirror_sync_stop(disk, 0);
2637		}
2638		disk->d_state = state;
2639		disk->d_sync.ds_offset = 0;
2640		disk->d_sync.ds_offset_done = 0;
2641		g_mirror_update_idle(sc, disk);
2642		g_mirror_update_metadata(disk);
2643		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2644		    sc->sc_name, g_mirror_get_diskname(disk));
2645		break;
2646	case G_MIRROR_DISK_STATE_STALE:
2647		/*
2648		 * Possible scenarios:
2649		 * 1. Stale disk was connected.
2650		 */
2651		/* Previous state should be NEW. */
2652		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2653		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2654		    g_mirror_disk_state2str(disk->d_state)));
2655		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2656		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2657		    g_mirror_device_state2str(sc->sc_state),
2658		    g_mirror_get_diskname(disk),
2659		    g_mirror_disk_state2str(disk->d_state)));
2660		/*
2661		 * STALE state is only possible if device is marked
2662		 * NOAUTOSYNC.
2663		 */
2664		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2665		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2666		    g_mirror_device_state2str(sc->sc_state),
2667		    g_mirror_get_diskname(disk),
2668		    g_mirror_disk_state2str(disk->d_state)));
2669		DISK_STATE_CHANGED();
2670
2671		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2672		disk->d_state = state;
2673		g_mirror_update_metadata(disk);
2674		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2675		    sc->sc_name, g_mirror_get_diskname(disk));
2676		break;
2677	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2678		/*
2679		 * Possible scenarios:
2680		 * 1. Disk which needs synchronization was connected.
2681		 */
2682		/* Previous state should be NEW. */
2683		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2684		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2685		    g_mirror_disk_state2str(disk->d_state)));
2686		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2687		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2688		    g_mirror_device_state2str(sc->sc_state),
2689		    g_mirror_get_diskname(disk),
2690		    g_mirror_disk_state2str(disk->d_state)));
2691		DISK_STATE_CHANGED();
2692
2693		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2694			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2695		disk->d_state = state;
2696		if (sc->sc_provider != NULL) {
2697			g_mirror_sync_start(disk);
2698			g_mirror_update_metadata(disk);
2699		}
2700		break;
2701	case G_MIRROR_DISK_STATE_DISCONNECTED:
2702		/*
2703		 * Possible scenarios:
2704		 * 1. Device wasn't running yet, but disk disappear.
2705		 * 2. Disk was active and disapppear.
2706		 * 3. Disk disappear during synchronization process.
2707		 */
2708		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2709			/*
2710			 * Previous state should be ACTIVE, STALE or
2711			 * SYNCHRONIZING.
2712			 */
2713			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2714			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2715			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2716			    ("Wrong disk state (%s, %s).",
2717			    g_mirror_get_diskname(disk),
2718			    g_mirror_disk_state2str(disk->d_state)));
2719		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2720			/* Previous state should be NEW. */
2721			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2722			    ("Wrong disk state (%s, %s).",
2723			    g_mirror_get_diskname(disk),
2724			    g_mirror_disk_state2str(disk->d_state)));
2725			/*
2726			 * Reset bumping syncid if disk disappeared in STARTING
2727			 * state.
2728			 */
2729			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2730				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2731#ifdef	INVARIANTS
2732		} else {
2733			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2734			    sc->sc_name,
2735			    g_mirror_device_state2str(sc->sc_state),
2736			    g_mirror_get_diskname(disk),
2737			    g_mirror_disk_state2str(disk->d_state)));
2738#endif
2739		}
2740		DISK_STATE_CHANGED();
2741		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2742		    sc->sc_name, g_mirror_get_diskname(disk));
2743
2744		g_mirror_destroy_disk(disk);
2745		break;
2746	case G_MIRROR_DISK_STATE_DESTROY:
2747	    {
2748		int error;
2749
2750		error = g_mirror_clear_metadata(disk);
2751		if (error != 0) {
2752			G_MIRROR_DEBUG(0,
2753			    "Device %s: failed to clear metadata on %s: %d.",
2754			    sc->sc_name, g_mirror_get_diskname(disk), error);
2755			break;
2756		}
2757		DISK_STATE_CHANGED();
2758		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2759		    sc->sc_name, g_mirror_get_diskname(disk));
2760
2761		g_mirror_destroy_disk(disk);
2762		sc->sc_ndisks--;
2763		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2764			g_mirror_update_metadata(disk);
2765		}
2766		break;
2767	    }
2768	default:
2769		KASSERT(1 == 0, ("Unknown state (%u).", state));
2770		break;
2771	}
2772	return (0);
2773}
2774#undef	DISK_STATE_CHANGED
2775
2776int
2777g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2778{
2779	struct g_provider *pp;
2780	u_char *buf;
2781	int error;
2782
2783	g_topology_assert();
2784
2785	error = g_access(cp, 1, 0, 0);
2786	if (error != 0)
2787		return (error);
2788	pp = cp->provider;
2789	g_topology_unlock();
2790	/* Metadata are stored on last sector. */
2791	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2792	    &error);
2793	g_topology_lock();
2794	g_access(cp, -1, 0, 0);
2795	if (buf == NULL) {
2796		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2797		    cp->provider->name, error);
2798		return (error);
2799	}
2800
2801	/* Decode metadata. */
2802	error = mirror_metadata_decode(buf, md);
2803	g_free(buf);
2804	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2805		return (EINVAL);
2806	if (md->md_version > G_MIRROR_VERSION) {
2807		G_MIRROR_DEBUG(0,
2808		    "Kernel module is too old to handle metadata from %s.",
2809		    cp->provider->name);
2810		return (EINVAL);
2811	}
2812	if (error != 0) {
2813		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2814		    cp->provider->name);
2815		return (error);
2816	}
2817
2818	return (0);
2819}
2820
2821static int
2822g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2823    struct g_mirror_metadata *md)
2824{
2825
2826	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2827		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2828		    pp->name, md->md_did);
2829		return (EEXIST);
2830	}
2831	if (md->md_all != sc->sc_ndisks) {
2832		G_MIRROR_DEBUG(1,
2833		    "Invalid '%s' field on disk %s (device %s), skipping.",
2834		    "md_all", pp->name, sc->sc_name);
2835		return (EINVAL);
2836	}
2837	if (md->md_slice != sc->sc_slice) {
2838		G_MIRROR_DEBUG(1,
2839		    "Invalid '%s' field on disk %s (device %s), skipping.",
2840		    "md_slice", pp->name, sc->sc_name);
2841		return (EINVAL);
2842	}
2843	if (md->md_balance != sc->sc_balance) {
2844		G_MIRROR_DEBUG(1,
2845		    "Invalid '%s' field on disk %s (device %s), skipping.",
2846		    "md_balance", pp->name, sc->sc_name);
2847		return (EINVAL);
2848	}
2849#if 0
2850	if (md->md_mediasize != sc->sc_mediasize) {
2851		G_MIRROR_DEBUG(1,
2852		    "Invalid '%s' field on disk %s (device %s), skipping.",
2853		    "md_mediasize", pp->name, sc->sc_name);
2854		return (EINVAL);
2855	}
2856#endif
2857	if (sc->sc_mediasize > pp->mediasize) {
2858		G_MIRROR_DEBUG(1,
2859		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2860		    sc->sc_name);
2861		return (EINVAL);
2862	}
2863	if (md->md_sectorsize != sc->sc_sectorsize) {
2864		G_MIRROR_DEBUG(1,
2865		    "Invalid '%s' field on disk %s (device %s), skipping.",
2866		    "md_sectorsize", pp->name, sc->sc_name);
2867		return (EINVAL);
2868	}
2869	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2870		G_MIRROR_DEBUG(1,
2871		    "Invalid sector size of disk %s (device %s), skipping.",
2872		    pp->name, sc->sc_name);
2873		return (EINVAL);
2874	}
2875	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2876		G_MIRROR_DEBUG(1,
2877		    "Invalid device flags on disk %s (device %s), skipping.",
2878		    pp->name, sc->sc_name);
2879		return (EINVAL);
2880	}
2881	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2882		G_MIRROR_DEBUG(1,
2883		    "Invalid disk flags on disk %s (device %s), skipping.",
2884		    pp->name, sc->sc_name);
2885		return (EINVAL);
2886	}
2887	return (0);
2888}
2889
2890int
2891g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2892    struct g_mirror_metadata *md)
2893{
2894	struct g_mirror_disk *disk;
2895	int error;
2896
2897	g_topology_assert_not();
2898	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2899
2900	error = g_mirror_check_metadata(sc, pp, md);
2901	if (error != 0)
2902		return (error);
2903	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2904	    md->md_genid < sc->sc_genid) {
2905		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2906		    pp->name, sc->sc_name);
2907		return (EINVAL);
2908	}
2909	disk = g_mirror_init_disk(sc, pp, md, &error);
2910	if (disk == NULL)
2911		return (error);
2912	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2913	    G_MIRROR_EVENT_WAIT);
2914	if (error != 0)
2915		return (error);
2916	if (md->md_version < G_MIRROR_VERSION) {
2917		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2918		    pp->name, md->md_version, G_MIRROR_VERSION);
2919		g_mirror_update_metadata(disk);
2920	}
2921	return (0);
2922}
2923
2924static void
2925g_mirror_destroy_delayed(void *arg, int flag)
2926{
2927	struct g_mirror_softc *sc;
2928	int error;
2929
2930	if (flag == EV_CANCEL) {
2931		G_MIRROR_DEBUG(1, "Destroying canceled.");
2932		return;
2933	}
2934	sc = arg;
2935	g_topology_unlock();
2936	sx_xlock(&sc->sc_lock);
2937	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2938	    ("DESTROY flag set on %s.", sc->sc_name));
2939	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
2940	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
2941	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2942	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2943	if (error != 0) {
2944		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2945		    sc->sc_name, error);
2946		sx_xunlock(&sc->sc_lock);
2947	}
2948	g_topology_lock();
2949}
2950
2951static int
2952g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2953{
2954	struct g_mirror_softc *sc;
2955	int error = 0;
2956
2957	g_topology_assert();
2958	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2959	    acw, ace);
2960
2961	sc = pp->private;
2962	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2963
2964	g_topology_unlock();
2965	sx_xlock(&sc->sc_lock);
2966	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2967	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
2968	    LIST_EMPTY(&sc->sc_disks)) {
2969		if (acr > 0 || acw > 0 || ace > 0)
2970			error = ENXIO;
2971		goto end;
2972	}
2973	sc->sc_provider_open += acr + acw + ace;
2974	if (pp->acw + acw == 0)
2975		g_mirror_idle(sc, 0);
2976	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
2977	    sc->sc_provider_open == 0)
2978		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2979end:
2980	sx_xunlock(&sc->sc_lock);
2981	g_topology_lock();
2982	return (error);
2983}
2984
2985struct g_geom *
2986g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
2987    u_int type)
2988{
2989	struct g_mirror_softc *sc;
2990	struct g_geom *gp;
2991	int error, timeout;
2992
2993	g_topology_assert();
2994	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2995	    md->md_mid);
2996
2997	/* One disk is minimum. */
2998	if (md->md_all < 1)
2999		return (NULL);
3000	/*
3001	 * Action geom.
3002	 */
3003	gp = g_new_geomf(mp, "%s", md->md_name);
3004	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3005	gp->start = g_mirror_start;
3006	gp->orphan = g_mirror_orphan;
3007	gp->access = g_mirror_access;
3008	gp->dumpconf = g_mirror_dumpconf;
3009
3010	sc->sc_type = type;
3011	sc->sc_id = md->md_mid;
3012	sc->sc_slice = md->md_slice;
3013	sc->sc_balance = md->md_balance;
3014	sc->sc_mediasize = md->md_mediasize;
3015	sc->sc_sectorsize = md->md_sectorsize;
3016	sc->sc_ndisks = md->md_all;
3017	sc->sc_flags = md->md_mflags;
3018	sc->sc_bump_id = 0;
3019	sc->sc_idle = 1;
3020	sc->sc_last_write = time_uptime;
3021	sc->sc_writes = 0;
3022	sc->sc_refcnt = 1;
3023	sx_init(&sc->sc_lock, "gmirror:lock");
3024	TAILQ_INIT(&sc->sc_queue);
3025	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3026	TAILQ_INIT(&sc->sc_regular_delayed);
3027	TAILQ_INIT(&sc->sc_inflight);
3028	TAILQ_INIT(&sc->sc_sync_delayed);
3029	LIST_INIT(&sc->sc_disks);
3030	TAILQ_INIT(&sc->sc_events);
3031	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3032	callout_init(&sc->sc_callout, 1);
3033	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3034	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3035	gp->softc = sc;
3036	sc->sc_geom = gp;
3037	sc->sc_provider = NULL;
3038	sc->sc_provider_open = 0;
3039	/*
3040	 * Synchronization geom.
3041	 */
3042	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3043	gp->softc = sc;
3044	gp->orphan = g_mirror_orphan;
3045	sc->sc_sync.ds_geom = gp;
3046	sc->sc_sync.ds_ndisks = 0;
3047	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3048	    "g_mirror %s", md->md_name);
3049	if (error != 0) {
3050		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3051		    sc->sc_name);
3052		g_destroy_geom(sc->sc_sync.ds_geom);
3053		g_destroy_geom(sc->sc_geom);
3054		g_mirror_free_device(sc);
3055		return (NULL);
3056	}
3057
3058	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3059	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3060
3061	sc->sc_rootmount = root_mount_hold("GMIRROR");
3062	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3063	/*
3064	 * Run timeout.
3065	 */
3066	timeout = g_mirror_timeout * hz;
3067	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3068	return (sc->sc_geom);
3069}
3070
3071int
3072g_mirror_destroy(struct g_mirror_softc *sc, int how)
3073{
3074	struct g_mirror_disk *disk;
3075
3076	g_topology_assert_not();
3077	sx_assert(&sc->sc_lock, SX_XLOCKED);
3078
3079	if (sc->sc_provider_open != 0) {
3080		switch (how) {
3081		case G_MIRROR_DESTROY_SOFT:
3082			G_MIRROR_DEBUG(1,
3083			    "Device %s is still open (%d).", sc->sc_name,
3084			    sc->sc_provider_open);
3085			return (EBUSY);
3086		case G_MIRROR_DESTROY_DELAYED:
3087			G_MIRROR_DEBUG(1,
3088			    "Device %s will be destroyed on last close.",
3089			    sc->sc_name);
3090			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3091				if (disk->d_state ==
3092				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3093					g_mirror_sync_stop(disk, 1);
3094				}
3095			}
3096			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3097			return (EBUSY);
3098		case G_MIRROR_DESTROY_HARD:
3099			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3100			    "can't be definitely removed.", sc->sc_name);
3101		}
3102	}
3103
3104	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3105		sx_xunlock(&sc->sc_lock);
3106		return (0);
3107	}
3108	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3109	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3110	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3111	sx_xunlock(&sc->sc_lock);
3112	mtx_lock(&sc->sc_queue_mtx);
3113	wakeup(sc);
3114	mtx_unlock(&sc->sc_queue_mtx);
3115	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3116	while (sc->sc_worker != NULL)
3117		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3118	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3119	sx_xlock(&sc->sc_lock);
3120	g_mirror_destroy_device(sc);
3121	return (0);
3122}
3123
3124static void
3125g_mirror_taste_orphan(struct g_consumer *cp)
3126{
3127
3128	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3129	    cp->provider->name));
3130}
3131
3132static struct g_geom *
3133g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3134{
3135	struct g_mirror_metadata md;
3136	struct g_mirror_softc *sc;
3137	struct g_consumer *cp;
3138	struct g_geom *gp;
3139	int error;
3140
3141	g_topology_assert();
3142	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3143	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3144
3145	gp = g_new_geomf(mp, "mirror:taste");
3146	/*
3147	 * This orphan function should be never called.
3148	 */
3149	gp->orphan = g_mirror_taste_orphan;
3150	cp = g_new_consumer(gp);
3151	g_attach(cp, pp);
3152	error = g_mirror_read_metadata(cp, &md);
3153	g_detach(cp);
3154	g_destroy_consumer(cp);
3155	g_destroy_geom(gp);
3156	if (error != 0)
3157		return (NULL);
3158	gp = NULL;
3159
3160	if (md.md_provider[0] != '\0' &&
3161	    !g_compare_names(md.md_provider, pp->name))
3162		return (NULL);
3163	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3164		return (NULL);
3165	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3166		G_MIRROR_DEBUG(0,
3167		    "Device %s: provider %s marked as inactive, skipping.",
3168		    md.md_name, pp->name);
3169		return (NULL);
3170	}
3171	if (g_mirror_debug >= 2)
3172		mirror_metadata_dump(&md);
3173
3174	/*
3175	 * Let's check if device already exists.
3176	 */
3177	sc = NULL;
3178	LIST_FOREACH(gp, &mp->geom, geom) {
3179		sc = gp->softc;
3180		if (sc == NULL)
3181			continue;
3182		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3183			continue;
3184		if (sc->sc_sync.ds_geom == gp)
3185			continue;
3186		if (strcmp(md.md_name, sc->sc_name) != 0)
3187			continue;
3188		if (md.md_mid != sc->sc_id) {
3189			G_MIRROR_DEBUG(0, "Device %s already configured.",
3190			    sc->sc_name);
3191			return (NULL);
3192		}
3193		break;
3194	}
3195	if (gp == NULL) {
3196		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3197		if (gp == NULL) {
3198			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3199			    md.md_name);
3200			return (NULL);
3201		}
3202		sc = gp->softc;
3203	}
3204	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3205	g_topology_unlock();
3206	sx_xlock(&sc->sc_lock);
3207	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3208	error = g_mirror_add_disk(sc, pp, &md);
3209	if (error != 0) {
3210		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3211		    pp->name, gp->name, error);
3212		if (LIST_EMPTY(&sc->sc_disks)) {
3213			g_cancel_event(sc);
3214			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3215			g_topology_lock();
3216			return (NULL);
3217		}
3218		gp = NULL;
3219	}
3220	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3221	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3222		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3223		g_topology_lock();
3224		return (NULL);
3225	}
3226	sx_xunlock(&sc->sc_lock);
3227	g_topology_lock();
3228	return (gp);
3229}
3230
3231static void
3232g_mirror_resize(struct g_consumer *cp)
3233{
3234	struct g_mirror_disk *disk;
3235
3236	g_topology_assert();
3237	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3238
3239	disk = cp->private;
3240	if (disk == NULL)
3241		return;
3242	g_topology_unlock();
3243	g_mirror_update_metadata(disk);
3244	g_topology_lock();
3245}
3246
3247static int
3248g_mirror_destroy_geom(struct gctl_req *req __unused,
3249    struct g_class *mp __unused, struct g_geom *gp)
3250{
3251	struct g_mirror_softc *sc;
3252	int error;
3253
3254	g_topology_unlock();
3255	sc = gp->softc;
3256	sx_xlock(&sc->sc_lock);
3257	g_cancel_event(sc);
3258	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3259	if (error != 0)
3260		sx_xunlock(&sc->sc_lock);
3261	g_topology_lock();
3262	return (error);
3263}
3264
3265static void
3266g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3267    struct g_consumer *cp, struct g_provider *pp)
3268{
3269	struct g_mirror_softc *sc;
3270
3271	g_topology_assert();
3272
3273	sc = gp->softc;
3274	if (sc == NULL)
3275		return;
3276	/* Skip synchronization geom. */
3277	if (gp == sc->sc_sync.ds_geom)
3278		return;
3279	if (pp != NULL) {
3280		/* Nothing here. */
3281	} else if (cp != NULL) {
3282		struct g_mirror_disk *disk;
3283
3284		disk = cp->private;
3285		if (disk == NULL)
3286			return;
3287		g_topology_unlock();
3288		sx_xlock(&sc->sc_lock);
3289		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3290		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3291			sbuf_printf(sb, "%s<Synchronized>", indent);
3292			if (disk->d_sync.ds_offset == 0)
3293				sbuf_printf(sb, "0%%");
3294			else {
3295				sbuf_printf(sb, "%u%%",
3296				    (u_int)((disk->d_sync.ds_offset * 100) /
3297				    sc->sc_provider->mediasize));
3298			}
3299			sbuf_printf(sb, "</Synchronized>\n");
3300			if (disk->d_sync.ds_offset > 0) {
3301				sbuf_printf(sb, "%s<BytesSynced>%jd"
3302				    "</BytesSynced>\n", indent,
3303				    (intmax_t)disk->d_sync.ds_offset);
3304			}
3305		}
3306		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3307		    disk->d_sync.ds_syncid);
3308		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3309		    disk->d_genid);
3310		sbuf_printf(sb, "%s<Flags>", indent);
3311		if (disk->d_flags == 0)
3312			sbuf_printf(sb, "NONE");
3313		else {
3314			int first = 1;
3315
3316#define	ADD_FLAG(flag, name)	do {					\
3317	if ((disk->d_flags & (flag)) != 0) {				\
3318		if (!first)						\
3319			sbuf_printf(sb, ", ");				\
3320		else							\
3321			first = 0;					\
3322		sbuf_printf(sb, name);					\
3323	}								\
3324} while (0)
3325			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3326			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3327			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3328			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3329			    "SYNCHRONIZING");
3330			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3331			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3332#undef	ADD_FLAG
3333		}
3334		sbuf_printf(sb, "</Flags>\n");
3335		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3336		    disk->d_priority);
3337		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3338		    g_mirror_disk_state2str(disk->d_state));
3339		sx_xunlock(&sc->sc_lock);
3340		g_topology_lock();
3341	} else {
3342		g_topology_unlock();
3343		sx_xlock(&sc->sc_lock);
3344		sbuf_printf(sb, "%s<Type>", indent);
3345		switch (sc->sc_type) {
3346		case G_MIRROR_TYPE_AUTOMATIC:
3347			sbuf_printf(sb, "AUTOMATIC");
3348			break;
3349		case G_MIRROR_TYPE_MANUAL:
3350			sbuf_printf(sb, "MANUAL");
3351			break;
3352		default:
3353			sbuf_printf(sb, "UNKNOWN");
3354			break;
3355		}
3356		sbuf_printf(sb, "</Type>\n");
3357		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3358		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3359		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3360		sbuf_printf(sb, "%s<Flags>", indent);
3361		if (sc->sc_flags == 0)
3362			sbuf_printf(sb, "NONE");
3363		else {
3364			int first = 1;
3365
3366#define	ADD_FLAG(flag, name)	do {					\
3367	if ((sc->sc_flags & (flag)) != 0) {				\
3368		if (!first)						\
3369			sbuf_printf(sb, ", ");				\
3370		else							\
3371			first = 0;					\
3372		sbuf_printf(sb, name);					\
3373	}								\
3374} while (0)
3375			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3376			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3377#undef	ADD_FLAG
3378		}
3379		sbuf_printf(sb, "</Flags>\n");
3380		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3381		    (u_int)sc->sc_slice);
3382		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3383		    balance_name(sc->sc_balance));
3384		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3385		    sc->sc_ndisks);
3386		sbuf_printf(sb, "%s<State>", indent);
3387		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3388			sbuf_printf(sb, "%s", "STARTING");
3389		else if (sc->sc_ndisks ==
3390		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3391			sbuf_printf(sb, "%s", "COMPLETE");
3392		else
3393			sbuf_printf(sb, "%s", "DEGRADED");
3394		sbuf_printf(sb, "</State>\n");
3395		sx_xunlock(&sc->sc_lock);
3396		g_topology_lock();
3397	}
3398}
3399
3400static void
3401g_mirror_shutdown_post_sync(void *arg, int howto)
3402{
3403	struct g_class *mp;
3404	struct g_geom *gp, *gp2;
3405	struct g_mirror_softc *sc;
3406	int error;
3407
3408	if (panicstr != NULL)
3409		return;
3410
3411	mp = arg;
3412	g_topology_lock();
3413	g_mirror_shutdown = 1;
3414	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3415		if ((sc = gp->softc) == NULL)
3416			continue;
3417		/* Skip synchronization geom. */
3418		if (gp == sc->sc_sync.ds_geom)
3419			continue;
3420		g_topology_unlock();
3421		sx_xlock(&sc->sc_lock);
3422		g_mirror_idle(sc, -1);
3423		g_cancel_event(sc);
3424		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3425		if (error != 0)
3426			sx_xunlock(&sc->sc_lock);
3427		g_topology_lock();
3428	}
3429	g_topology_unlock();
3430}
3431
3432static void
3433g_mirror_init(struct g_class *mp)
3434{
3435
3436	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3437	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3438	if (g_mirror_post_sync == NULL)
3439		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3440}
3441
3442static void
3443g_mirror_fini(struct g_class *mp)
3444{
3445
3446	if (g_mirror_post_sync != NULL)
3447		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3448}
3449
3450DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3451