g_mirror.c revision 327493
1/*-
2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/geom/mirror/g_mirror.c 327493 2018-01-02 16:19:41Z markj $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/fail.h>
33#include <sys/kernel.h>
34#include <sys/module.h>
35#include <sys/limits.h>
36#include <sys/lock.h>
37#include <sys/mutex.h>
38#include <sys/bio.h>
39#include <sys/sbuf.h>
40#include <sys/sysctl.h>
41#include <sys/malloc.h>
42#include <sys/eventhandler.h>
43#include <vm/uma.h>
44#include <geom/geom.h>
45#include <sys/proc.h>
46#include <sys/kthread.h>
47#include <sys/sched.h>
48#include <geom/mirror/g_mirror.h>
49
50FEATURE(geom_mirror, "GEOM mirroring support");
51
52static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
53
54SYSCTL_DECL(_kern_geom);
55static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
56    "GEOM_MIRROR stuff");
57int g_mirror_debug = 0;
58SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
59    "Debug level");
60static u_int g_mirror_timeout = 4;
61SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
62    0, "Time to wait on all mirror components");
63static u_int g_mirror_idletime = 5;
64SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
65    &g_mirror_idletime, 0, "Mark components as clean when idling");
66static u_int g_mirror_disconnect_on_failure = 1;
67SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
68    &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
69static u_int g_mirror_syncreqs = 2;
70SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
71    &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
72static u_int g_mirror_sync_period = 5;
73SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
74    &g_mirror_sync_period, 0,
75    "Metadata update period during synchronization, in seconds");
76
77#define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
78	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
79	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
80	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
81} while (0)
82
83static eventhandler_tag g_mirror_post_sync = NULL;
84static int g_mirror_shutdown = 0;
85
86static g_ctl_destroy_geom_t g_mirror_destroy_geom;
87static g_taste_t g_mirror_taste;
88static g_init_t g_mirror_init;
89static g_fini_t g_mirror_fini;
90static g_provgone_t g_mirror_providergone;
91static g_resize_t g_mirror_resize;
92
93struct g_class g_mirror_class = {
94	.name = G_MIRROR_CLASS_NAME,
95	.version = G_VERSION,
96	.ctlreq = g_mirror_config,
97	.taste = g_mirror_taste,
98	.destroy_geom = g_mirror_destroy_geom,
99	.init = g_mirror_init,
100	.fini = g_mirror_fini,
101	.providergone = g_mirror_providergone,
102	.resize = g_mirror_resize
103};
104
105
106static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
107static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
108static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
109static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
110    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
111static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
112static void g_mirror_register_request(struct bio *bp);
113static void g_mirror_sync_release(struct g_mirror_softc *sc);
114
115
116static const char *
117g_mirror_disk_state2str(int state)
118{
119
120	switch (state) {
121	case G_MIRROR_DISK_STATE_NONE:
122		return ("NONE");
123	case G_MIRROR_DISK_STATE_NEW:
124		return ("NEW");
125	case G_MIRROR_DISK_STATE_ACTIVE:
126		return ("ACTIVE");
127	case G_MIRROR_DISK_STATE_STALE:
128		return ("STALE");
129	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
130		return ("SYNCHRONIZING");
131	case G_MIRROR_DISK_STATE_DISCONNECTED:
132		return ("DISCONNECTED");
133	case G_MIRROR_DISK_STATE_DESTROY:
134		return ("DESTROY");
135	default:
136		return ("INVALID");
137	}
138}
139
140static const char *
141g_mirror_device_state2str(int state)
142{
143
144	switch (state) {
145	case G_MIRROR_DEVICE_STATE_STARTING:
146		return ("STARTING");
147	case G_MIRROR_DEVICE_STATE_RUNNING:
148		return ("RUNNING");
149	default:
150		return ("INVALID");
151	}
152}
153
154static const char *
155g_mirror_get_diskname(struct g_mirror_disk *disk)
156{
157
158	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
159		return ("[unknown]");
160	return (disk->d_name);
161}
162
163/*
164 * --- Events handling functions ---
165 * Events in geom_mirror are used to maintain disks and device status
166 * from one thread to simplify locking.
167 */
168static void
169g_mirror_event_free(struct g_mirror_event *ep)
170{
171
172	free(ep, M_MIRROR);
173}
174
175int
176g_mirror_event_send(void *arg, int state, int flags)
177{
178	struct g_mirror_softc *sc;
179	struct g_mirror_disk *disk;
180	struct g_mirror_event *ep;
181	int error;
182
183	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
184	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
185	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
186		disk = NULL;
187		sc = arg;
188	} else {
189		disk = arg;
190		sc = disk->d_softc;
191	}
192	ep->e_disk = disk;
193	ep->e_state = state;
194	ep->e_flags = flags;
195	ep->e_error = 0;
196	mtx_lock(&sc->sc_events_mtx);
197	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
198	mtx_unlock(&sc->sc_events_mtx);
199	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
200	mtx_lock(&sc->sc_queue_mtx);
201	wakeup(sc);
202	mtx_unlock(&sc->sc_queue_mtx);
203	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
204		return (0);
205	sx_assert(&sc->sc_lock, SX_XLOCKED);
206	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
207	sx_xunlock(&sc->sc_lock);
208	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
209		mtx_lock(&sc->sc_events_mtx);
210		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
211		    hz * 5);
212	}
213	error = ep->e_error;
214	g_mirror_event_free(ep);
215	sx_xlock(&sc->sc_lock);
216	return (error);
217}
218
219static struct g_mirror_event *
220g_mirror_event_first(struct g_mirror_softc *sc)
221{
222	struct g_mirror_event *ep;
223
224	mtx_lock(&sc->sc_events_mtx);
225	ep = TAILQ_FIRST(&sc->sc_events);
226	mtx_unlock(&sc->sc_events_mtx);
227	return (ep);
228}
229
230static void
231g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
232{
233
234	mtx_lock(&sc->sc_events_mtx);
235	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
236	mtx_unlock(&sc->sc_events_mtx);
237}
238
239static void
240g_mirror_event_cancel(struct g_mirror_disk *disk)
241{
242	struct g_mirror_softc *sc;
243	struct g_mirror_event *ep, *tmpep;
244
245	sc = disk->d_softc;
246	sx_assert(&sc->sc_lock, SX_XLOCKED);
247
248	mtx_lock(&sc->sc_events_mtx);
249	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
250		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
251			continue;
252		if (ep->e_disk != disk)
253			continue;
254		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
255		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
256			g_mirror_event_free(ep);
257		else {
258			ep->e_error = ECANCELED;
259			wakeup(ep);
260		}
261	}
262	mtx_unlock(&sc->sc_events_mtx);
263}
264
265/*
266 * Return the number of disks in given state.
267 * If state is equal to -1, count all connected disks.
268 */
269u_int
270g_mirror_ndisks(struct g_mirror_softc *sc, int state)
271{
272	struct g_mirror_disk *disk;
273	u_int n = 0;
274
275	sx_assert(&sc->sc_lock, SX_LOCKED);
276
277	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
278		if (state == -1 || disk->d_state == state)
279			n++;
280	}
281	return (n);
282}
283
284/*
285 * Find a disk in mirror by its disk ID.
286 */
287static struct g_mirror_disk *
288g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
289{
290	struct g_mirror_disk *disk;
291
292	sx_assert(&sc->sc_lock, SX_XLOCKED);
293
294	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
295		if (disk->d_id == id)
296			return (disk);
297	}
298	return (NULL);
299}
300
301static u_int
302g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
303{
304	struct bio *bp;
305	u_int nreqs = 0;
306
307	mtx_lock(&sc->sc_queue_mtx);
308	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
309		if (bp->bio_from == cp)
310			nreqs++;
311	}
312	mtx_unlock(&sc->sc_queue_mtx);
313	return (nreqs);
314}
315
316static int
317g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
318{
319
320	if (cp->index > 0) {
321		G_MIRROR_DEBUG(2,
322		    "I/O requests for %s exist, can't destroy it now.",
323		    cp->provider->name);
324		return (1);
325	}
326	if (g_mirror_nrequests(sc, cp) > 0) {
327		G_MIRROR_DEBUG(2,
328		    "I/O requests for %s in queue, can't destroy it now.",
329		    cp->provider->name);
330		return (1);
331	}
332	return (0);
333}
334
335static void
336g_mirror_destroy_consumer(void *arg, int flags __unused)
337{
338	struct g_consumer *cp;
339
340	g_topology_assert();
341
342	cp = arg;
343	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
344	g_detach(cp);
345	g_destroy_consumer(cp);
346}
347
348static void
349g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
350{
351	struct g_provider *pp;
352	int retaste_wait;
353
354	g_topology_assert();
355
356	cp->private = NULL;
357	if (g_mirror_is_busy(sc, cp))
358		return;
359	pp = cp->provider;
360	retaste_wait = 0;
361	if (cp->acw == 1) {
362		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
363			retaste_wait = 1;
364	}
365	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
366	    -cp->acw, -cp->ace, 0);
367	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
368		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
369	if (retaste_wait) {
370		/*
371		 * After retaste event was send (inside g_access()), we can send
372		 * event to detach and destroy consumer.
373		 * A class, which has consumer to the given provider connected
374		 * will not receive retaste event for the provider.
375		 * This is the way how I ignore retaste events when I close
376		 * consumers opened for write: I detach and destroy consumer
377		 * after retaste event is sent.
378		 */
379		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
380		return;
381	}
382	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
383	g_detach(cp);
384	g_destroy_consumer(cp);
385}
386
387static int
388g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
389{
390	struct g_consumer *cp;
391	int error;
392
393	g_topology_assert_not();
394	KASSERT(disk->d_consumer == NULL,
395	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
396
397	g_topology_lock();
398	cp = g_new_consumer(disk->d_softc->sc_geom);
399	cp->flags |= G_CF_DIRECT_RECEIVE;
400	error = g_attach(cp, pp);
401	if (error != 0) {
402		g_destroy_consumer(cp);
403		g_topology_unlock();
404		return (error);
405	}
406	error = g_access(cp, 1, 1, 1);
407	if (error != 0) {
408		g_detach(cp);
409		g_destroy_consumer(cp);
410		g_topology_unlock();
411		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
412		    pp->name, error);
413		return (error);
414	}
415	g_topology_unlock();
416	disk->d_consumer = cp;
417	disk->d_consumer->private = disk;
418	disk->d_consumer->index = 0;
419
420	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
421	return (0);
422}
423
424static void
425g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
426{
427
428	g_topology_assert();
429
430	if (cp == NULL)
431		return;
432	if (cp->provider != NULL)
433		g_mirror_kill_consumer(sc, cp);
434	else
435		g_destroy_consumer(cp);
436}
437
438/*
439 * Initialize disk. This means allocate memory, create consumer, attach it
440 * to the provider and open access (r1w1e1) to it.
441 */
442static struct g_mirror_disk *
443g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
444    struct g_mirror_metadata *md, int *errorp)
445{
446	struct g_mirror_disk *disk;
447	int i, error;
448
449	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
450	if (disk == NULL) {
451		error = ENOMEM;
452		goto fail;
453	}
454	disk->d_softc = sc;
455	error = g_mirror_connect_disk(disk, pp);
456	if (error != 0)
457		goto fail;
458	disk->d_id = md->md_did;
459	disk->d_state = G_MIRROR_DISK_STATE_NONE;
460	disk->d_priority = md->md_priority;
461	disk->d_flags = md->md_dflags;
462	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
463	if (error == 0 && i != 0)
464		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
465	if (md->md_provider[0] != '\0')
466		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
467	disk->d_sync.ds_consumer = NULL;
468	disk->d_sync.ds_offset = md->md_sync_offset;
469	disk->d_sync.ds_offset_done = md->md_sync_offset;
470	disk->d_sync.ds_update_ts = time_uptime;
471	disk->d_genid = md->md_genid;
472	disk->d_sync.ds_syncid = md->md_syncid;
473	if (errorp != NULL)
474		*errorp = 0;
475	return (disk);
476fail:
477	if (errorp != NULL)
478		*errorp = error;
479	if (disk != NULL)
480		free(disk, M_MIRROR);
481	return (NULL);
482}
483
484static void
485g_mirror_destroy_disk(struct g_mirror_disk *disk)
486{
487	struct g_mirror_softc *sc;
488
489	g_topology_assert_not();
490	sc = disk->d_softc;
491	sx_assert(&sc->sc_lock, SX_XLOCKED);
492
493	LIST_REMOVE(disk, d_next);
494	g_mirror_event_cancel(disk);
495	if (sc->sc_hint == disk)
496		sc->sc_hint = NULL;
497	switch (disk->d_state) {
498	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
499		g_mirror_sync_stop(disk, 1);
500		/* FALLTHROUGH */
501	case G_MIRROR_DISK_STATE_NEW:
502	case G_MIRROR_DISK_STATE_STALE:
503	case G_MIRROR_DISK_STATE_ACTIVE:
504		g_topology_lock();
505		g_mirror_disconnect_consumer(sc, disk->d_consumer);
506		g_topology_unlock();
507		free(disk, M_MIRROR);
508		break;
509	default:
510		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
511		    g_mirror_get_diskname(disk),
512		    g_mirror_disk_state2str(disk->d_state)));
513	}
514}
515
516static void
517g_mirror_free_device(struct g_mirror_softc *sc)
518{
519
520	mtx_destroy(&sc->sc_queue_mtx);
521	mtx_destroy(&sc->sc_events_mtx);
522	mtx_destroy(&sc->sc_done_mtx);
523	sx_destroy(&sc->sc_lock);
524	free(sc, M_MIRROR);
525}
526
527static void
528g_mirror_providergone(struct g_provider *pp)
529{
530	struct g_mirror_softc *sc = pp->private;
531
532	if ((--sc->sc_refcnt) == 0)
533		g_mirror_free_device(sc);
534}
535
536static void
537g_mirror_destroy_device(struct g_mirror_softc *sc)
538{
539	struct g_mirror_disk *disk;
540	struct g_mirror_event *ep;
541	struct g_geom *gp;
542	struct g_consumer *cp, *tmpcp;
543
544	g_topology_assert_not();
545	sx_assert(&sc->sc_lock, SX_XLOCKED);
546
547	gp = sc->sc_geom;
548	if (sc->sc_provider != NULL)
549		g_mirror_destroy_provider(sc);
550	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
551	    disk = LIST_FIRST(&sc->sc_disks)) {
552		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
553		g_mirror_update_metadata(disk);
554		g_mirror_destroy_disk(disk);
555	}
556	while ((ep = g_mirror_event_first(sc)) != NULL) {
557		g_mirror_event_remove(sc, ep);
558		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
559			g_mirror_event_free(ep);
560		else {
561			ep->e_error = ECANCELED;
562			ep->e_flags |= G_MIRROR_EVENT_DONE;
563			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
564			mtx_lock(&sc->sc_events_mtx);
565			wakeup(ep);
566			mtx_unlock(&sc->sc_events_mtx);
567		}
568	}
569	callout_drain(&sc->sc_callout);
570
571	g_topology_lock();
572	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
573		g_mirror_disconnect_consumer(sc, cp);
574	}
575	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
576	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
577	g_wither_geom(gp, ENXIO);
578	sx_xunlock(&sc->sc_lock);
579	if ((--sc->sc_refcnt) == 0)
580		g_mirror_free_device(sc);
581	g_topology_unlock();
582}
583
584static void
585g_mirror_orphan(struct g_consumer *cp)
586{
587	struct g_mirror_disk *disk;
588
589	g_topology_assert();
590
591	disk = cp->private;
592	if (disk == NULL)
593		return;
594	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
595	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
596	    G_MIRROR_EVENT_DONTWAIT);
597}
598
599/*
600 * Function should return the next active disk on the list.
601 * It is possible that it will be the same disk as given.
602 * If there are no active disks on list, NULL is returned.
603 */
604static __inline struct g_mirror_disk *
605g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
606{
607	struct g_mirror_disk *dp;
608
609	for (dp = LIST_NEXT(disk, d_next); dp != disk;
610	    dp = LIST_NEXT(dp, d_next)) {
611		if (dp == NULL)
612			dp = LIST_FIRST(&sc->sc_disks);
613		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
614			break;
615	}
616	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
617		return (NULL);
618	return (dp);
619}
620
621static struct g_mirror_disk *
622g_mirror_get_disk(struct g_mirror_softc *sc)
623{
624	struct g_mirror_disk *disk;
625
626	if (sc->sc_hint == NULL) {
627		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
628		if (sc->sc_hint == NULL)
629			return (NULL);
630	}
631	disk = sc->sc_hint;
632	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
633		disk = g_mirror_find_next(sc, disk);
634		if (disk == NULL)
635			return (NULL);
636	}
637	sc->sc_hint = g_mirror_find_next(sc, disk);
638	return (disk);
639}
640
641static int
642g_mirror_write_metadata(struct g_mirror_disk *disk,
643    struct g_mirror_metadata *md)
644{
645	struct g_mirror_softc *sc;
646	struct g_consumer *cp;
647	off_t offset, length;
648	u_char *sector;
649	int error = 0;
650
651	g_topology_assert_not();
652	sc = disk->d_softc;
653	sx_assert(&sc->sc_lock, SX_LOCKED);
654
655	cp = disk->d_consumer;
656	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
657	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
658	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
659	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
660	    cp->acw, cp->ace));
661	length = cp->provider->sectorsize;
662	offset = cp->provider->mediasize - length;
663	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
664	if (md != NULL &&
665	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
666		/*
667		 * Handle the case, when the size of parent provider reduced.
668		 */
669		if (offset < md->md_mediasize)
670			error = ENOSPC;
671		else
672			mirror_metadata_encode(md, sector);
673	}
674	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
675	if (error == 0)
676		error = g_write_data(cp, offset, sector, length);
677	free(sector, M_MIRROR);
678	if (error != 0) {
679		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
680			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
681			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
682			    "(device=%s, error=%d).",
683			    g_mirror_get_diskname(disk), sc->sc_name, error);
684		} else {
685			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
686			    "(device=%s, error=%d).",
687			    g_mirror_get_diskname(disk), sc->sc_name, error);
688		}
689		if (g_mirror_disconnect_on_failure &&
690		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
691			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
692			g_mirror_event_send(disk,
693			    G_MIRROR_DISK_STATE_DISCONNECTED,
694			    G_MIRROR_EVENT_DONTWAIT);
695		}
696	}
697	return (error);
698}
699
700static int
701g_mirror_clear_metadata(struct g_mirror_disk *disk)
702{
703	int error;
704
705	g_topology_assert_not();
706	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
707
708	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
709		return (0);
710	error = g_mirror_write_metadata(disk, NULL);
711	if (error == 0) {
712		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
713		    g_mirror_get_diskname(disk));
714	} else {
715		G_MIRROR_DEBUG(0,
716		    "Cannot clear metadata on disk %s (error=%d).",
717		    g_mirror_get_diskname(disk), error);
718	}
719	return (error);
720}
721
722void
723g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
724    struct g_mirror_metadata *md)
725{
726
727	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
728	md->md_version = G_MIRROR_VERSION;
729	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
730	md->md_mid = sc->sc_id;
731	md->md_all = sc->sc_ndisks;
732	md->md_slice = sc->sc_slice;
733	md->md_balance = sc->sc_balance;
734	md->md_genid = sc->sc_genid;
735	md->md_mediasize = sc->sc_mediasize;
736	md->md_sectorsize = sc->sc_sectorsize;
737	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
738	bzero(md->md_provider, sizeof(md->md_provider));
739	if (disk == NULL) {
740		md->md_did = arc4random();
741		md->md_priority = 0;
742		md->md_syncid = 0;
743		md->md_dflags = 0;
744		md->md_sync_offset = 0;
745		md->md_provsize = 0;
746	} else {
747		md->md_did = disk->d_id;
748		md->md_priority = disk->d_priority;
749		md->md_syncid = disk->d_sync.ds_syncid;
750		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
751		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
752			md->md_sync_offset = disk->d_sync.ds_offset_done;
753		else
754			md->md_sync_offset = 0;
755		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
756			strlcpy(md->md_provider,
757			    disk->d_consumer->provider->name,
758			    sizeof(md->md_provider));
759		}
760		md->md_provsize = disk->d_consumer->provider->mediasize;
761	}
762}
763
764void
765g_mirror_update_metadata(struct g_mirror_disk *disk)
766{
767	struct g_mirror_softc *sc;
768	struct g_mirror_metadata md;
769	int error;
770
771	g_topology_assert_not();
772	sc = disk->d_softc;
773	sx_assert(&sc->sc_lock, SX_LOCKED);
774
775	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
776		return;
777	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
778		g_mirror_fill_metadata(sc, disk, &md);
779	error = g_mirror_write_metadata(disk, &md);
780	if (error == 0) {
781		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
782		    g_mirror_get_diskname(disk));
783	} else {
784		G_MIRROR_DEBUG(0,
785		    "Cannot update metadata on disk %s (error=%d).",
786		    g_mirror_get_diskname(disk), error);
787	}
788}
789
790static void
791g_mirror_bump_syncid(struct g_mirror_softc *sc)
792{
793	struct g_mirror_disk *disk;
794
795	g_topology_assert_not();
796	sx_assert(&sc->sc_lock, SX_XLOCKED);
797	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
798	    ("%s called with no active disks (device=%s).", __func__,
799	    sc->sc_name));
800
801	sc->sc_syncid++;
802	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
803	    sc->sc_syncid);
804	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
805		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
806		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
807			disk->d_sync.ds_syncid = sc->sc_syncid;
808			g_mirror_update_metadata(disk);
809		}
810	}
811}
812
813static void
814g_mirror_bump_genid(struct g_mirror_softc *sc)
815{
816	struct g_mirror_disk *disk;
817
818	g_topology_assert_not();
819	sx_assert(&sc->sc_lock, SX_XLOCKED);
820	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
821	    ("%s called with no active disks (device=%s).", __func__,
822	    sc->sc_name));
823
824	sc->sc_genid++;
825	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
826	    sc->sc_genid);
827	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
828		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
829		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
830			disk->d_genid = sc->sc_genid;
831			g_mirror_update_metadata(disk);
832		}
833	}
834}
835
836static int
837g_mirror_idle(struct g_mirror_softc *sc, int acw)
838{
839	struct g_mirror_disk *disk;
840	int timeout;
841
842	g_topology_assert_not();
843	sx_assert(&sc->sc_lock, SX_XLOCKED);
844
845	if (sc->sc_provider == NULL)
846		return (0);
847	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
848		return (0);
849	if (sc->sc_idle)
850		return (0);
851	if (sc->sc_writes > 0)
852		return (0);
853	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
854		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
855		if (!g_mirror_shutdown && timeout > 0)
856			return (timeout);
857	}
858	sc->sc_idle = 1;
859	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
860		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
861			continue;
862		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
863		    g_mirror_get_diskname(disk), sc->sc_name);
864		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
865		g_mirror_update_metadata(disk);
866	}
867	return (0);
868}
869
870static void
871g_mirror_unidle(struct g_mirror_softc *sc)
872{
873	struct g_mirror_disk *disk;
874
875	g_topology_assert_not();
876	sx_assert(&sc->sc_lock, SX_XLOCKED);
877
878	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
879		return;
880	sc->sc_idle = 0;
881	sc->sc_last_write = time_uptime;
882	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
883		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
884			continue;
885		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
886		    g_mirror_get_diskname(disk), sc->sc_name);
887		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
888		g_mirror_update_metadata(disk);
889	}
890}
891
892static void
893g_mirror_flush_done(struct bio *bp)
894{
895	struct g_mirror_softc *sc;
896	struct bio *pbp;
897
898	pbp = bp->bio_parent;
899	sc = pbp->bio_to->private;
900	mtx_lock(&sc->sc_done_mtx);
901	if (pbp->bio_error == 0)
902		pbp->bio_error = bp->bio_error;
903	pbp->bio_completed += bp->bio_completed;
904	pbp->bio_inbed++;
905	if (pbp->bio_children == pbp->bio_inbed) {
906		mtx_unlock(&sc->sc_done_mtx);
907		g_io_deliver(pbp, pbp->bio_error);
908	} else
909		mtx_unlock(&sc->sc_done_mtx);
910	g_destroy_bio(bp);
911}
912
913static void
914g_mirror_done(struct bio *bp)
915{
916	struct g_mirror_softc *sc;
917
918	sc = bp->bio_from->geom->softc;
919	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
920	mtx_lock(&sc->sc_queue_mtx);
921	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
922	mtx_unlock(&sc->sc_queue_mtx);
923	wakeup(sc);
924}
925
926static void
927g_mirror_regular_request(struct bio *bp)
928{
929	struct g_mirror_softc *sc;
930	struct g_mirror_disk *disk;
931	struct bio *pbp;
932
933	g_topology_assert_not();
934
935	pbp = bp->bio_parent;
936	sc = pbp->bio_to->private;
937	bp->bio_from->index--;
938	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
939		sc->sc_writes--;
940	disk = bp->bio_from->private;
941	if (disk == NULL) {
942		g_topology_lock();
943		g_mirror_kill_consumer(sc, bp->bio_from);
944		g_topology_unlock();
945	}
946
947	if (bp->bio_cmd == BIO_READ)
948		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
949		    bp->bio_error);
950	else if (bp->bio_cmd == BIO_WRITE)
951		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
952		    bp->bio_error);
953
954	pbp->bio_inbed++;
955	KASSERT(pbp->bio_inbed <= pbp->bio_children,
956	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
957	    pbp->bio_children));
958	if (bp->bio_error == 0 && pbp->bio_error == 0) {
959		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
960		g_destroy_bio(bp);
961		if (pbp->bio_children == pbp->bio_inbed) {
962			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
963			pbp->bio_completed = pbp->bio_length;
964			if (pbp->bio_cmd == BIO_WRITE ||
965			    pbp->bio_cmd == BIO_DELETE) {
966				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
967				/* Release delayed sync requests if possible. */
968				g_mirror_sync_release(sc);
969			}
970			g_io_deliver(pbp, pbp->bio_error);
971		}
972		return;
973	} else if (bp->bio_error != 0) {
974		if (pbp->bio_error == 0)
975			pbp->bio_error = bp->bio_error;
976		if (disk != NULL) {
977			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
978				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
979				G_MIRROR_LOGREQ(0, bp,
980				    "Request failed (error=%d).",
981				    bp->bio_error);
982			} else {
983				G_MIRROR_LOGREQ(1, bp,
984				    "Request failed (error=%d).",
985				    bp->bio_error);
986			}
987			if (g_mirror_disconnect_on_failure &&
988			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
989			{
990				if (bp->bio_error == ENXIO &&
991				    bp->bio_cmd == BIO_READ)
992					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
993				else if (bp->bio_error == ENXIO)
994					sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
995				else
996					sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
997				g_mirror_event_send(disk,
998				    G_MIRROR_DISK_STATE_DISCONNECTED,
999				    G_MIRROR_EVENT_DONTWAIT);
1000			}
1001		}
1002		switch (pbp->bio_cmd) {
1003		case BIO_DELETE:
1004		case BIO_WRITE:
1005			pbp->bio_inbed--;
1006			pbp->bio_children--;
1007			break;
1008		}
1009	}
1010	g_destroy_bio(bp);
1011
1012	switch (pbp->bio_cmd) {
1013	case BIO_READ:
1014		if (pbp->bio_inbed < pbp->bio_children)
1015			break;
1016		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1017			g_io_deliver(pbp, pbp->bio_error);
1018		else {
1019			pbp->bio_error = 0;
1020			mtx_lock(&sc->sc_queue_mtx);
1021			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1022			mtx_unlock(&sc->sc_queue_mtx);
1023			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1024			wakeup(sc);
1025		}
1026		break;
1027	case BIO_DELETE:
1028	case BIO_WRITE:
1029		if (pbp->bio_children == 0) {
1030			/*
1031			 * All requests failed.
1032			 */
1033		} else if (pbp->bio_inbed < pbp->bio_children) {
1034			/* Do nothing. */
1035			break;
1036		} else if (pbp->bio_children == pbp->bio_inbed) {
1037			/* Some requests succeeded. */
1038			pbp->bio_error = 0;
1039			pbp->bio_completed = pbp->bio_length;
1040		}
1041		TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1042		/* Release delayed sync requests if possible. */
1043		g_mirror_sync_release(sc);
1044		g_io_deliver(pbp, pbp->bio_error);
1045		break;
1046	default:
1047		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1048		break;
1049	}
1050}
1051
1052static void
1053g_mirror_sync_done(struct bio *bp)
1054{
1055	struct g_mirror_softc *sc;
1056
1057	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1058	sc = bp->bio_from->geom->softc;
1059	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1060	mtx_lock(&sc->sc_queue_mtx);
1061	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1062	mtx_unlock(&sc->sc_queue_mtx);
1063	wakeup(sc);
1064}
1065
1066static void
1067g_mirror_candelete(struct bio *bp)
1068{
1069	struct g_mirror_softc *sc;
1070	struct g_mirror_disk *disk;
1071	int *val;
1072
1073	sc = bp->bio_to->private;
1074	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1075		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1076			break;
1077	}
1078	val = (int *)bp->bio_data;
1079	*val = (disk != NULL);
1080	g_io_deliver(bp, 0);
1081}
1082
1083static void
1084g_mirror_kernel_dump(struct bio *bp)
1085{
1086	struct g_mirror_softc *sc;
1087	struct g_mirror_disk *disk;
1088	struct bio *cbp;
1089	struct g_kerneldump *gkd;
1090
1091	/*
1092	 * We configure dumping to the first component, because this component
1093	 * will be used for reading with 'prefer' balance algorithm.
1094	 * If the component with the highest priority is currently disconnected
1095	 * we will not be able to read the dump after the reboot if it will be
1096	 * connected and synchronized later. Can we do something better?
1097	 */
1098	sc = bp->bio_to->private;
1099	disk = LIST_FIRST(&sc->sc_disks);
1100
1101	gkd = (struct g_kerneldump *)bp->bio_data;
1102	if (gkd->length > bp->bio_to->mediasize)
1103		gkd->length = bp->bio_to->mediasize;
1104	cbp = g_clone_bio(bp);
1105	if (cbp == NULL) {
1106		g_io_deliver(bp, ENOMEM);
1107		return;
1108	}
1109	cbp->bio_done = g_std_done;
1110	g_io_request(cbp, disk->d_consumer);
1111	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1112	    g_mirror_get_diskname(disk));
1113}
1114
1115static void
1116g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1117{
1118	struct bio_queue queue;
1119	struct g_mirror_disk *disk;
1120	struct g_consumer *cp;
1121	struct bio *cbp;
1122
1123	TAILQ_INIT(&queue);
1124	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1125		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1126			continue;
1127		cbp = g_clone_bio(bp);
1128		if (cbp == NULL) {
1129			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1130				TAILQ_REMOVE(&queue, cbp, bio_queue);
1131				g_destroy_bio(cbp);
1132			}
1133			if (bp->bio_error == 0)
1134				bp->bio_error = ENOMEM;
1135			g_io_deliver(bp, bp->bio_error);
1136			return;
1137		}
1138		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1139		cbp->bio_done = g_mirror_flush_done;
1140		cbp->bio_caller1 = disk;
1141		cbp->bio_to = disk->d_consumer->provider;
1142	}
1143	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1144		TAILQ_REMOVE(&queue, cbp, bio_queue);
1145		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1146		disk = cbp->bio_caller1;
1147		cbp->bio_caller1 = NULL;
1148		cp = disk->d_consumer;
1149		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1150		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1151		    cp->acr, cp->acw, cp->ace));
1152		g_io_request(cbp, disk->d_consumer);
1153	}
1154}
1155
1156static void
1157g_mirror_start(struct bio *bp)
1158{
1159	struct g_mirror_softc *sc;
1160
1161	sc = bp->bio_to->private;
1162	/*
1163	 * If sc == NULL or there are no valid disks, provider's error
1164	 * should be set and g_mirror_start() should not be called at all.
1165	 */
1166	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1167	    ("Provider's error should be set (error=%d)(mirror=%s).",
1168	    bp->bio_to->error, bp->bio_to->name));
1169	G_MIRROR_LOGREQ(3, bp, "Request received.");
1170
1171	switch (bp->bio_cmd) {
1172	case BIO_READ:
1173	case BIO_WRITE:
1174	case BIO_DELETE:
1175		break;
1176	case BIO_FLUSH:
1177		g_mirror_flush(sc, bp);
1178		return;
1179	case BIO_GETATTR:
1180		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1181			g_mirror_candelete(bp);
1182			return;
1183		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1184			g_mirror_kernel_dump(bp);
1185			return;
1186		}
1187		/* FALLTHROUGH */
1188	default:
1189		g_io_deliver(bp, EOPNOTSUPP);
1190		return;
1191	}
1192	mtx_lock(&sc->sc_queue_mtx);
1193	if (bp->bio_to->error != 0) {
1194		mtx_unlock(&sc->sc_queue_mtx);
1195		g_io_deliver(bp, bp->bio_to->error);
1196		return;
1197	}
1198	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1199	mtx_unlock(&sc->sc_queue_mtx);
1200	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1201	wakeup(sc);
1202}
1203
1204/*
1205 * Return TRUE if the given request is colliding with a in-progress
1206 * synchronization request.
1207 */
1208static bool
1209g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1210{
1211	struct g_mirror_disk *disk;
1212	struct bio *sbp;
1213	off_t rstart, rend, sstart, send;
1214	u_int i;
1215
1216	if (sc->sc_sync.ds_ndisks == 0)
1217		return (false);
1218	rstart = bp->bio_offset;
1219	rend = bp->bio_offset + bp->bio_length;
1220	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1221		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1222			continue;
1223		for (i = 0; i < g_mirror_syncreqs; i++) {
1224			sbp = disk->d_sync.ds_bios[i];
1225			if (sbp == NULL)
1226				continue;
1227			sstart = sbp->bio_offset;
1228			send = sbp->bio_offset + sbp->bio_length;
1229			if (rend > sstart && rstart < send)
1230				return (true);
1231		}
1232	}
1233	return (false);
1234}
1235
1236/*
1237 * Return TRUE if the given sync request is colliding with a in-progress regular
1238 * request.
1239 */
1240static bool
1241g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1242{
1243	off_t rstart, rend, sstart, send;
1244	struct bio *bp;
1245
1246	if (sc->sc_sync.ds_ndisks == 0)
1247		return (false);
1248	sstart = sbp->bio_offset;
1249	send = sbp->bio_offset + sbp->bio_length;
1250	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1251		rstart = bp->bio_offset;
1252		rend = bp->bio_offset + bp->bio_length;
1253		if (rend > sstart && rstart < send)
1254			return (true);
1255	}
1256	return (false);
1257}
1258
1259/*
1260 * Puts request onto delayed queue.
1261 */
1262static void
1263g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1264{
1265
1266	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1267	TAILQ_INSERT_HEAD(&sc->sc_regular_delayed, bp, bio_queue);
1268}
1269
1270/*
1271 * Puts synchronization request onto delayed queue.
1272 */
1273static void
1274g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1275{
1276
1277	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1278	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1279}
1280
1281/*
1282 * Releases delayed regular requests which don't collide anymore with sync
1283 * requests.
1284 */
1285static void
1286g_mirror_regular_release(struct g_mirror_softc *sc)
1287{
1288	struct bio *bp, *bp2;
1289
1290	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed, bio_queue, bp2) {
1291		if (g_mirror_sync_collision(sc, bp))
1292			continue;
1293		TAILQ_REMOVE(&sc->sc_regular_delayed, bp, bio_queue);
1294		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1295		mtx_lock(&sc->sc_queue_mtx);
1296		TAILQ_INSERT_HEAD(&sc->sc_queue, bp, bio_queue);
1297		mtx_unlock(&sc->sc_queue_mtx);
1298	}
1299}
1300
1301/*
1302 * Releases delayed sync requests which don't collide anymore with regular
1303 * requests.
1304 */
1305static void
1306g_mirror_sync_release(struct g_mirror_softc *sc)
1307{
1308	struct bio *bp, *bp2;
1309
1310	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1311		if (g_mirror_regular_collision(sc, bp))
1312			continue;
1313		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1314		G_MIRROR_LOGREQ(2, bp,
1315		    "Releasing delayed synchronization request.");
1316		g_io_request(bp, bp->bio_from);
1317	}
1318}
1319
1320/*
1321 * Free a synchronization request and clear its slot in the array.
1322 */
1323static void
1324g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1325{
1326	int idx;
1327
1328	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1329		idx = (int)(uintptr_t)bp->bio_caller1;
1330		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1331		    ("unexpected sync BIO at %p:%d", disk, idx));
1332		disk->d_sync.ds_bios[idx] = NULL;
1333	}
1334	free(bp->bio_data, M_MIRROR);
1335	g_destroy_bio(bp);
1336}
1337
1338/*
1339 * Handle synchronization requests.
1340 * Every synchronization request is two-steps process: first, READ request is
1341 * send to active provider and then WRITE request (with read data) to the provider
1342 * being synchronized. When WRITE is finished, new synchronization request is
1343 * send.
1344 */
1345static void
1346g_mirror_sync_request(struct bio *bp)
1347{
1348	struct g_mirror_softc *sc;
1349	struct g_mirror_disk *disk;
1350	struct g_mirror_disk_sync *sync;
1351
1352	bp->bio_from->index--;
1353	sc = bp->bio_from->geom->softc;
1354	disk = bp->bio_from->private;
1355	if (disk == NULL) {
1356		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1357		g_topology_lock();
1358		g_mirror_kill_consumer(sc, bp->bio_from);
1359		g_topology_unlock();
1360		g_mirror_sync_request_free(NULL, bp);
1361		sx_xlock(&sc->sc_lock);
1362		return;
1363	}
1364
1365	/*
1366	 * Synchronization request.
1367	 */
1368	switch (bp->bio_cmd) {
1369	case BIO_READ:
1370	    {
1371		struct g_consumer *cp;
1372
1373		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1374		    bp->bio_error);
1375
1376		if (bp->bio_error != 0) {
1377			G_MIRROR_LOGREQ(0, bp,
1378			    "Synchronization request failed (error=%d).",
1379			    bp->bio_error);
1380			g_mirror_sync_request_free(disk, bp);
1381			return;
1382		}
1383		G_MIRROR_LOGREQ(3, bp,
1384		    "Synchronization request half-finished.");
1385		bp->bio_cmd = BIO_WRITE;
1386		bp->bio_cflags = 0;
1387		cp = disk->d_consumer;
1388		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1389		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1390		    cp->acr, cp->acw, cp->ace));
1391		cp->index++;
1392		g_io_request(bp, cp);
1393		return;
1394	    }
1395	case BIO_WRITE:
1396	    {
1397		off_t offset;
1398		void *data;
1399		int i, idx;
1400
1401		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1402		    bp->bio_error);
1403
1404		if (bp->bio_error != 0) {
1405			G_MIRROR_LOGREQ(0, bp,
1406			    "Synchronization request failed (error=%d).",
1407			    bp->bio_error);
1408			g_mirror_sync_request_free(disk, bp);
1409			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1410			g_mirror_event_send(disk,
1411			    G_MIRROR_DISK_STATE_DISCONNECTED,
1412			    G_MIRROR_EVENT_DONTWAIT);
1413			return;
1414		}
1415		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1416		sync = &disk->d_sync;
1417		if (sync->ds_offset >= sc->sc_mediasize ||
1418		    sync->ds_consumer == NULL ||
1419		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1420			/* Don't send more synchronization requests. */
1421			sync->ds_inflight--;
1422			g_mirror_sync_request_free(disk, bp);
1423			if (sync->ds_inflight > 0)
1424				return;
1425			if (sync->ds_consumer == NULL ||
1426			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1427				return;
1428			}
1429			/* Disk up-to-date, activate it. */
1430			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1431			    G_MIRROR_EVENT_DONTWAIT);
1432			return;
1433		}
1434
1435		/* Send next synchronization request. */
1436		data = bp->bio_data;
1437		idx = (int)(uintptr_t)bp->bio_caller1;
1438		g_reset_bio(bp);
1439		bp->bio_cmd = BIO_READ;
1440		bp->bio_offset = sync->ds_offset;
1441		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1442		sync->ds_offset += bp->bio_length;
1443		bp->bio_done = g_mirror_sync_done;
1444		bp->bio_data = data;
1445		bp->bio_from = sync->ds_consumer;
1446		bp->bio_to = sc->sc_provider;
1447		bp->bio_caller1 = (void *)(uintptr_t)idx;
1448		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1449		sync->ds_consumer->index++;
1450		/*
1451		 * Delay the request if it is colliding with a regular request.
1452		 */
1453		if (g_mirror_regular_collision(sc, bp))
1454			g_mirror_sync_delay(sc, bp);
1455		else
1456			g_io_request(bp, sync->ds_consumer);
1457
1458		/* Release delayed requests if possible. */
1459		g_mirror_regular_release(sc);
1460
1461		/* Find the smallest offset */
1462		offset = sc->sc_mediasize;
1463		for (i = 0; i < g_mirror_syncreqs; i++) {
1464			bp = sync->ds_bios[i];
1465			if (bp != NULL && bp->bio_offset < offset)
1466				offset = bp->bio_offset;
1467		}
1468		if (g_mirror_sync_period > 0 &&
1469		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1470			sync->ds_offset_done = offset;
1471			g_mirror_update_metadata(disk);
1472			sync->ds_update_ts = time_uptime;
1473		}
1474		return;
1475	    }
1476	default:
1477		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1478		    bp->bio_cmd, sc->sc_name));
1479		break;
1480	}
1481}
1482
1483static void
1484g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1485{
1486	struct g_mirror_disk *disk;
1487	struct g_consumer *cp;
1488	struct bio *cbp;
1489
1490	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1491		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1492			break;
1493	}
1494	if (disk == NULL) {
1495		if (bp->bio_error == 0)
1496			bp->bio_error = ENXIO;
1497		g_io_deliver(bp, bp->bio_error);
1498		return;
1499	}
1500	cbp = g_clone_bio(bp);
1501	if (cbp == NULL) {
1502		if (bp->bio_error == 0)
1503			bp->bio_error = ENOMEM;
1504		g_io_deliver(bp, bp->bio_error);
1505		return;
1506	}
1507	/*
1508	 * Fill in the component buf structure.
1509	 */
1510	cp = disk->d_consumer;
1511	cbp->bio_done = g_mirror_done;
1512	cbp->bio_to = cp->provider;
1513	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1514	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1515	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1516	    cp->acw, cp->ace));
1517	cp->index++;
1518	g_io_request(cbp, cp);
1519}
1520
1521static void
1522g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1523{
1524	struct g_mirror_disk *disk;
1525	struct g_consumer *cp;
1526	struct bio *cbp;
1527
1528	disk = g_mirror_get_disk(sc);
1529	if (disk == NULL) {
1530		if (bp->bio_error == 0)
1531			bp->bio_error = ENXIO;
1532		g_io_deliver(bp, bp->bio_error);
1533		return;
1534	}
1535	cbp = g_clone_bio(bp);
1536	if (cbp == NULL) {
1537		if (bp->bio_error == 0)
1538			bp->bio_error = ENOMEM;
1539		g_io_deliver(bp, bp->bio_error);
1540		return;
1541	}
1542	/*
1543	 * Fill in the component buf structure.
1544	 */
1545	cp = disk->d_consumer;
1546	cbp->bio_done = g_mirror_done;
1547	cbp->bio_to = cp->provider;
1548	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1549	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1550	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1551	    cp->acw, cp->ace));
1552	cp->index++;
1553	g_io_request(cbp, cp);
1554}
1555
1556#define TRACK_SIZE  (1 * 1024 * 1024)
1557#define LOAD_SCALE	256
1558#define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1559
1560static void
1561g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1562{
1563	struct g_mirror_disk *disk, *dp;
1564	struct g_consumer *cp;
1565	struct bio *cbp;
1566	int prio, best;
1567
1568	/* Find a disk with the smallest load. */
1569	disk = NULL;
1570	best = INT_MAX;
1571	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1572		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1573			continue;
1574		prio = dp->load;
1575		/* If disk head is precisely in position - highly prefer it. */
1576		if (dp->d_last_offset == bp->bio_offset)
1577			prio -= 2 * LOAD_SCALE;
1578		else
1579		/* If disk head is close to position - prefer it. */
1580		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1581			prio -= 1 * LOAD_SCALE;
1582		if (prio <= best) {
1583			disk = dp;
1584			best = prio;
1585		}
1586	}
1587	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1588	cbp = g_clone_bio(bp);
1589	if (cbp == NULL) {
1590		if (bp->bio_error == 0)
1591			bp->bio_error = ENOMEM;
1592		g_io_deliver(bp, bp->bio_error);
1593		return;
1594	}
1595	/*
1596	 * Fill in the component buf structure.
1597	 */
1598	cp = disk->d_consumer;
1599	cbp->bio_done = g_mirror_done;
1600	cbp->bio_to = cp->provider;
1601	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1602	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1603	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1604	    cp->acw, cp->ace));
1605	cp->index++;
1606	/* Remember last head position */
1607	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1608	/* Update loads. */
1609	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1610		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1611		    dp->load * 7) / 8;
1612	}
1613	g_io_request(cbp, cp);
1614}
1615
1616static void
1617g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1618{
1619	struct bio_queue queue;
1620	struct g_mirror_disk *disk;
1621	struct g_consumer *cp;
1622	struct bio *cbp;
1623	off_t left, mod, offset, slice;
1624	u_char *data;
1625	u_int ndisks;
1626
1627	if (bp->bio_length <= sc->sc_slice) {
1628		g_mirror_request_round_robin(sc, bp);
1629		return;
1630	}
1631	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1632	slice = bp->bio_length / ndisks;
1633	mod = slice % sc->sc_provider->sectorsize;
1634	if (mod != 0)
1635		slice += sc->sc_provider->sectorsize - mod;
1636	/*
1637	 * Allocate all bios before sending any request, so we can
1638	 * return ENOMEM in nice and clean way.
1639	 */
1640	left = bp->bio_length;
1641	offset = bp->bio_offset;
1642	data = bp->bio_data;
1643	TAILQ_INIT(&queue);
1644	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1645		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1646			continue;
1647		cbp = g_clone_bio(bp);
1648		if (cbp == NULL) {
1649			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1650				TAILQ_REMOVE(&queue, cbp, bio_queue);
1651				g_destroy_bio(cbp);
1652			}
1653			if (bp->bio_error == 0)
1654				bp->bio_error = ENOMEM;
1655			g_io_deliver(bp, bp->bio_error);
1656			return;
1657		}
1658		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1659		cbp->bio_done = g_mirror_done;
1660		cbp->bio_caller1 = disk;
1661		cbp->bio_to = disk->d_consumer->provider;
1662		cbp->bio_offset = offset;
1663		cbp->bio_data = data;
1664		cbp->bio_length = MIN(left, slice);
1665		left -= cbp->bio_length;
1666		if (left == 0)
1667			break;
1668		offset += cbp->bio_length;
1669		data += cbp->bio_length;
1670	}
1671	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1672		TAILQ_REMOVE(&queue, cbp, bio_queue);
1673		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1674		disk = cbp->bio_caller1;
1675		cbp->bio_caller1 = NULL;
1676		cp = disk->d_consumer;
1677		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1678		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1679		    cp->acr, cp->acw, cp->ace));
1680		disk->d_consumer->index++;
1681		g_io_request(cbp, disk->d_consumer);
1682	}
1683}
1684
1685static void
1686g_mirror_register_request(struct bio *bp)
1687{
1688	struct g_mirror_softc *sc;
1689
1690	sc = bp->bio_to->private;
1691	switch (bp->bio_cmd) {
1692	case BIO_READ:
1693		switch (sc->sc_balance) {
1694		case G_MIRROR_BALANCE_LOAD:
1695			g_mirror_request_load(sc, bp);
1696			break;
1697		case G_MIRROR_BALANCE_PREFER:
1698			g_mirror_request_prefer(sc, bp);
1699			break;
1700		case G_MIRROR_BALANCE_ROUND_ROBIN:
1701			g_mirror_request_round_robin(sc, bp);
1702			break;
1703		case G_MIRROR_BALANCE_SPLIT:
1704			g_mirror_request_split(sc, bp);
1705			break;
1706		}
1707		return;
1708	case BIO_WRITE:
1709	case BIO_DELETE:
1710	    {
1711		struct bio_queue queue;
1712		struct g_mirror_disk *disk;
1713		struct g_mirror_disk_sync *sync;
1714		struct g_consumer *cp;
1715		struct bio *cbp;
1716
1717		/*
1718		 * Delay the request if it is colliding with a synchronization
1719		 * request.
1720		 */
1721		if (g_mirror_sync_collision(sc, bp)) {
1722			g_mirror_regular_delay(sc, bp);
1723			return;
1724		}
1725
1726		if (sc->sc_idle)
1727			g_mirror_unidle(sc);
1728		else
1729			sc->sc_last_write = time_uptime;
1730
1731		/*
1732		 * Bump syncid on first write.
1733		 */
1734		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1735			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1736			g_mirror_bump_syncid(sc);
1737		}
1738
1739		/*
1740		 * Allocate all bios before sending any request, so we can
1741		 * return ENOMEM in nice and clean way.
1742		 */
1743		TAILQ_INIT(&queue);
1744		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1745			sync = &disk->d_sync;
1746			switch (disk->d_state) {
1747			case G_MIRROR_DISK_STATE_ACTIVE:
1748				break;
1749			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1750				if (bp->bio_offset >= sync->ds_offset)
1751					continue;
1752				break;
1753			default:
1754				continue;
1755			}
1756			if (bp->bio_cmd == BIO_DELETE &&
1757			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1758				continue;
1759			cbp = g_clone_bio(bp);
1760			if (cbp == NULL) {
1761				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1762					TAILQ_REMOVE(&queue, cbp, bio_queue);
1763					g_destroy_bio(cbp);
1764				}
1765				if (bp->bio_error == 0)
1766					bp->bio_error = ENOMEM;
1767				g_io_deliver(bp, bp->bio_error);
1768				return;
1769			}
1770			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1771			cbp->bio_done = g_mirror_done;
1772			cp = disk->d_consumer;
1773			cbp->bio_caller1 = cp;
1774			cbp->bio_to = cp->provider;
1775			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1776			    ("Consumer %s not opened (r%dw%de%d).",
1777			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1778		}
1779		if (TAILQ_EMPTY(&queue)) {
1780			g_io_deliver(bp, EOPNOTSUPP);
1781			return;
1782		}
1783		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1784			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1785			TAILQ_REMOVE(&queue, cbp, bio_queue);
1786			cp = cbp->bio_caller1;
1787			cbp->bio_caller1 = NULL;
1788			cp->index++;
1789			sc->sc_writes++;
1790			g_io_request(cbp, cp);
1791		}
1792		/*
1793		 * Put request onto inflight queue, so we can check if new
1794		 * synchronization requests don't collide with it.
1795		 */
1796		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1797		return;
1798	    }
1799	default:
1800		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1801		    bp->bio_cmd, sc->sc_name));
1802		break;
1803	}
1804}
1805
1806static int
1807g_mirror_can_destroy(struct g_mirror_softc *sc)
1808{
1809	struct g_geom *gp;
1810	struct g_consumer *cp;
1811
1812	g_topology_assert();
1813	gp = sc->sc_geom;
1814	if (gp->softc == NULL)
1815		return (1);
1816	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1817		return (0);
1818	LIST_FOREACH(cp, &gp->consumer, consumer) {
1819		if (g_mirror_is_busy(sc, cp))
1820			return (0);
1821	}
1822	gp = sc->sc_sync.ds_geom;
1823	LIST_FOREACH(cp, &gp->consumer, consumer) {
1824		if (g_mirror_is_busy(sc, cp))
1825			return (0);
1826	}
1827	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1828	    sc->sc_name);
1829	return (1);
1830}
1831
1832static int
1833g_mirror_try_destroy(struct g_mirror_softc *sc)
1834{
1835
1836	if (sc->sc_rootmount != NULL) {
1837		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1838		    sc->sc_rootmount);
1839		root_mount_rel(sc->sc_rootmount);
1840		sc->sc_rootmount = NULL;
1841	}
1842	g_topology_lock();
1843	if (!g_mirror_can_destroy(sc)) {
1844		g_topology_unlock();
1845		return (0);
1846	}
1847	sc->sc_geom->softc = NULL;
1848	sc->sc_sync.ds_geom->softc = NULL;
1849	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1850		g_topology_unlock();
1851		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1852		    &sc->sc_worker);
1853		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1854		sx_xunlock(&sc->sc_lock);
1855		wakeup(&sc->sc_worker);
1856		sc->sc_worker = NULL;
1857	} else {
1858		g_topology_unlock();
1859		g_mirror_destroy_device(sc);
1860	}
1861	return (1);
1862}
1863
1864/*
1865 * Worker thread.
1866 */
1867static void
1868g_mirror_worker(void *arg)
1869{
1870	struct g_mirror_softc *sc;
1871	struct g_mirror_event *ep;
1872	struct bio *bp;
1873	int timeout;
1874
1875	sc = arg;
1876	thread_lock(curthread);
1877	sched_prio(curthread, PRIBIO);
1878	thread_unlock(curthread);
1879
1880	sx_xlock(&sc->sc_lock);
1881	for (;;) {
1882		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1883		/*
1884		 * First take a look at events.
1885		 * This is important to handle events before any I/O requests.
1886		 */
1887		ep = g_mirror_event_first(sc);
1888		if (ep != NULL) {
1889			g_mirror_event_remove(sc, ep);
1890			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1891				/* Update only device status. */
1892				G_MIRROR_DEBUG(3,
1893				    "Running event for device %s.",
1894				    sc->sc_name);
1895				ep->e_error = 0;
1896				g_mirror_update_device(sc, true);
1897			} else {
1898				/* Update disk status. */
1899				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1900				     g_mirror_get_diskname(ep->e_disk));
1901				ep->e_error = g_mirror_update_disk(ep->e_disk,
1902				    ep->e_state);
1903				if (ep->e_error == 0)
1904					g_mirror_update_device(sc, false);
1905			}
1906			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1907				KASSERT(ep->e_error == 0,
1908				    ("Error cannot be handled."));
1909				g_mirror_event_free(ep);
1910			} else {
1911				ep->e_flags |= G_MIRROR_EVENT_DONE;
1912				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1913				    ep);
1914				mtx_lock(&sc->sc_events_mtx);
1915				wakeup(ep);
1916				mtx_unlock(&sc->sc_events_mtx);
1917			}
1918			if ((sc->sc_flags &
1919			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1920				if (g_mirror_try_destroy(sc)) {
1921					curthread->td_pflags &= ~TDP_GEOM;
1922					G_MIRROR_DEBUG(1, "Thread exiting.");
1923					kproc_exit(0);
1924				}
1925			}
1926			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1927			continue;
1928		}
1929		/*
1930		 * Check if we can mark array as CLEAN and if we can't take
1931		 * how much seconds should we wait.
1932		 */
1933		timeout = g_mirror_idle(sc, -1);
1934		/*
1935		 * Now I/O requests.
1936		 */
1937		/* Get first request from the queue. */
1938		mtx_lock(&sc->sc_queue_mtx);
1939		bp = TAILQ_FIRST(&sc->sc_queue);
1940		if (bp != NULL)
1941			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1942		else {
1943			if ((sc->sc_flags &
1944			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1945				mtx_unlock(&sc->sc_queue_mtx);
1946				if (g_mirror_try_destroy(sc)) {
1947					curthread->td_pflags &= ~TDP_GEOM;
1948					G_MIRROR_DEBUG(1, "Thread exiting.");
1949					kproc_exit(0);
1950				}
1951				mtx_lock(&sc->sc_queue_mtx);
1952				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1953					mtx_unlock(&sc->sc_queue_mtx);
1954					continue;
1955				}
1956			}
1957			if (g_mirror_event_first(sc) != NULL)
1958				continue;
1959			sx_xunlock(&sc->sc_lock);
1960			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1961			    timeout * hz);
1962			sx_xlock(&sc->sc_lock);
1963			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1964			continue;
1965		}
1966		mtx_unlock(&sc->sc_queue_mtx);
1967
1968		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1969		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1970			g_mirror_sync_request(bp);	/* READ */
1971		} else if (bp->bio_to != sc->sc_provider) {
1972			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1973				g_mirror_regular_request(bp);
1974			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1975				g_mirror_sync_request(bp);	/* WRITE */
1976			else {
1977				KASSERT(0,
1978				    ("Invalid request cflags=0x%hx to=%s.",
1979				    bp->bio_cflags, bp->bio_to->name));
1980			}
1981		} else {
1982			g_mirror_register_request(bp);
1983		}
1984		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1985	}
1986}
1987
1988static void
1989g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1990{
1991
1992	sx_assert(&sc->sc_lock, SX_LOCKED);
1993
1994	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1995		return;
1996	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1997		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
1998		    g_mirror_get_diskname(disk), sc->sc_name);
1999		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2000	} else if (sc->sc_idle &&
2001	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2002		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2003		    g_mirror_get_diskname(disk), sc->sc_name);
2004		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2005	}
2006}
2007
2008static void
2009g_mirror_sync_start(struct g_mirror_disk *disk)
2010{
2011	struct g_mirror_softc *sc;
2012	struct g_consumer *cp;
2013	struct bio *bp;
2014	int error, i;
2015
2016	g_topology_assert_not();
2017	sc = disk->d_softc;
2018	sx_assert(&sc->sc_lock, SX_LOCKED);
2019
2020	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2021	    ("Disk %s is not marked for synchronization.",
2022	    g_mirror_get_diskname(disk)));
2023	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2024	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2025	    sc->sc_state));
2026
2027	sx_xunlock(&sc->sc_lock);
2028	g_topology_lock();
2029	cp = g_new_consumer(sc->sc_sync.ds_geom);
2030	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2031	error = g_attach(cp, sc->sc_provider);
2032	KASSERT(error == 0,
2033	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2034	error = g_access(cp, 1, 0, 0);
2035	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2036	g_topology_unlock();
2037	sx_xlock(&sc->sc_lock);
2038
2039	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2040	    g_mirror_get_diskname(disk));
2041	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2042		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2043	KASSERT(disk->d_sync.ds_consumer == NULL,
2044	    ("Sync consumer already exists (device=%s, disk=%s).",
2045	    sc->sc_name, g_mirror_get_diskname(disk)));
2046
2047	disk->d_sync.ds_consumer = cp;
2048	disk->d_sync.ds_consumer->private = disk;
2049	disk->d_sync.ds_consumer->index = 0;
2050
2051	/*
2052	 * Allocate memory for synchronization bios and initialize them.
2053	 */
2054	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2055	    M_MIRROR, M_WAITOK);
2056	for (i = 0; i < g_mirror_syncreqs; i++) {
2057		bp = g_alloc_bio();
2058		disk->d_sync.ds_bios[i] = bp;
2059		bp->bio_parent = NULL;
2060		bp->bio_cmd = BIO_READ;
2061		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2062		bp->bio_cflags = 0;
2063		bp->bio_offset = disk->d_sync.ds_offset;
2064		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
2065		disk->d_sync.ds_offset += bp->bio_length;
2066		bp->bio_done = g_mirror_sync_done;
2067		bp->bio_from = disk->d_sync.ds_consumer;
2068		bp->bio_to = sc->sc_provider;
2069		bp->bio_caller1 = (void *)(uintptr_t)i;
2070	}
2071
2072	/* Increase the number of disks in SYNCHRONIZING state. */
2073	sc->sc_sync.ds_ndisks++;
2074	/* Set the number of in-flight synchronization requests. */
2075	disk->d_sync.ds_inflight = g_mirror_syncreqs;
2076
2077	/*
2078	 * Fire off first synchronization requests.
2079	 */
2080	for (i = 0; i < g_mirror_syncreqs; i++) {
2081		bp = disk->d_sync.ds_bios[i];
2082		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2083		disk->d_sync.ds_consumer->index++;
2084		/*
2085		 * Delay the request if it is colliding with a regular request.
2086		 */
2087		if (g_mirror_regular_collision(sc, bp))
2088			g_mirror_sync_delay(sc, bp);
2089		else
2090			g_io_request(bp, disk->d_sync.ds_consumer);
2091	}
2092}
2093
2094/*
2095 * Stop synchronization process.
2096 * type: 0 - synchronization finished
2097 *       1 - synchronization stopped
2098 */
2099static void
2100g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2101{
2102	struct g_mirror_softc *sc;
2103	struct g_consumer *cp;
2104
2105	g_topology_assert_not();
2106	sc = disk->d_softc;
2107	sx_assert(&sc->sc_lock, SX_LOCKED);
2108
2109	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2110	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2111	    g_mirror_disk_state2str(disk->d_state)));
2112	if (disk->d_sync.ds_consumer == NULL)
2113		return;
2114
2115	if (type == 0) {
2116		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2117		    sc->sc_name, g_mirror_get_diskname(disk));
2118	} else /* if (type == 1) */ {
2119		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2120		    sc->sc_name, g_mirror_get_diskname(disk));
2121	}
2122	g_mirror_regular_release(sc);
2123	free(disk->d_sync.ds_bios, M_MIRROR);
2124	disk->d_sync.ds_bios = NULL;
2125	cp = disk->d_sync.ds_consumer;
2126	disk->d_sync.ds_consumer = NULL;
2127	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2128	sc->sc_sync.ds_ndisks--;
2129	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2130	g_topology_lock();
2131	g_mirror_kill_consumer(sc, cp);
2132	g_topology_unlock();
2133	sx_xlock(&sc->sc_lock);
2134}
2135
2136static void
2137g_mirror_launch_provider(struct g_mirror_softc *sc)
2138{
2139	struct g_mirror_disk *disk;
2140	struct g_provider *pp, *dp;
2141
2142	sx_assert(&sc->sc_lock, SX_LOCKED);
2143
2144	g_topology_lock();
2145	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2146	pp->flags |= G_PF_DIRECT_RECEIVE;
2147	pp->mediasize = sc->sc_mediasize;
2148	pp->sectorsize = sc->sc_sectorsize;
2149	pp->stripesize = 0;
2150	pp->stripeoffset = 0;
2151
2152	/* Splitting of unmapped BIO's could work but isn't implemented now */
2153	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2154		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2155
2156	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2157		if (disk->d_consumer && disk->d_consumer->provider) {
2158			dp = disk->d_consumer->provider;
2159			if (dp->stripesize > pp->stripesize) {
2160				pp->stripesize = dp->stripesize;
2161				pp->stripeoffset = dp->stripeoffset;
2162			}
2163			/* A provider underneath us doesn't support unmapped */
2164			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2165				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2166				    "because of %s.", dp->name);
2167				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2168			}
2169		}
2170	}
2171	pp->private = sc;
2172	sc->sc_refcnt++;
2173	sc->sc_provider = pp;
2174	g_error_provider(pp, 0);
2175	g_topology_unlock();
2176	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2177	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2178	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2179		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2180			g_mirror_sync_start(disk);
2181	}
2182}
2183
2184static void
2185g_mirror_destroy_provider(struct g_mirror_softc *sc)
2186{
2187	struct g_mirror_disk *disk;
2188	struct bio *bp;
2189
2190	g_topology_assert_not();
2191	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2192	    sc->sc_name));
2193
2194	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2195		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2196			g_mirror_sync_stop(disk, 1);
2197	}
2198
2199	g_topology_lock();
2200	g_error_provider(sc->sc_provider, ENXIO);
2201	mtx_lock(&sc->sc_queue_mtx);
2202	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2203		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2204		/*
2205		 * Abort any pending I/O that wasn't generated by us.
2206		 * Synchronization requests and requests destined for individual
2207		 * mirror components can be destroyed immediately.
2208		 */
2209		if (bp->bio_to == sc->sc_provider &&
2210		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2211			g_io_deliver(bp, ENXIO);
2212		} else {
2213			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2214				free(bp->bio_data, M_MIRROR);
2215			g_destroy_bio(bp);
2216		}
2217	}
2218	mtx_unlock(&sc->sc_queue_mtx);
2219	g_wither_provider(sc->sc_provider, ENXIO);
2220	sc->sc_provider = NULL;
2221	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2222	g_topology_unlock();
2223}
2224
2225static void
2226g_mirror_go(void *arg)
2227{
2228	struct g_mirror_softc *sc;
2229
2230	sc = arg;
2231	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2232	g_mirror_event_send(sc, 0,
2233	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2234}
2235
2236static u_int
2237g_mirror_determine_state(struct g_mirror_disk *disk)
2238{
2239	struct g_mirror_softc *sc;
2240	u_int state;
2241
2242	sc = disk->d_softc;
2243	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2244		if ((disk->d_flags &
2245		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2246		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2247		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2248			/* Disk does not need synchronization. */
2249			state = G_MIRROR_DISK_STATE_ACTIVE;
2250		} else {
2251			if ((sc->sc_flags &
2252			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2253			    (disk->d_flags &
2254			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2255				/*
2256				 * We can start synchronization from
2257				 * the stored offset.
2258				 */
2259				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2260			} else {
2261				state = G_MIRROR_DISK_STATE_STALE;
2262			}
2263		}
2264	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2265		/*
2266		 * Reset all synchronization data for this disk,
2267		 * because if it even was synchronized, it was
2268		 * synchronized to disks with different syncid.
2269		 */
2270		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2271		disk->d_sync.ds_offset = 0;
2272		disk->d_sync.ds_offset_done = 0;
2273		disk->d_sync.ds_syncid = sc->sc_syncid;
2274		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2275		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2276			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2277		} else {
2278			state = G_MIRROR_DISK_STATE_STALE;
2279		}
2280	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2281		/*
2282		 * Not good, NOT GOOD!
2283		 * It means that mirror was started on stale disks
2284		 * and more fresh disk just arrive.
2285		 * If there were writes, mirror is broken, sorry.
2286		 * I think the best choice here is don't touch
2287		 * this disk and inform the user loudly.
2288		 */
2289		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2290		    "disk (%s) arrives!! It will not be connected to the "
2291		    "running device.", sc->sc_name,
2292		    g_mirror_get_diskname(disk));
2293		g_mirror_destroy_disk(disk);
2294		state = G_MIRROR_DISK_STATE_NONE;
2295		/* Return immediately, because disk was destroyed. */
2296		return (state);
2297	}
2298	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2299	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2300	return (state);
2301}
2302
2303/*
2304 * Update device state.
2305 */
2306static void
2307g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2308{
2309	struct g_mirror_disk *disk;
2310	u_int state;
2311
2312	sx_assert(&sc->sc_lock, SX_XLOCKED);
2313
2314	switch (sc->sc_state) {
2315	case G_MIRROR_DEVICE_STATE_STARTING:
2316	    {
2317		struct g_mirror_disk *pdisk, *tdisk;
2318		u_int dirty, ndisks, genid, syncid;
2319		bool broken;
2320
2321		KASSERT(sc->sc_provider == NULL,
2322		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2323		/*
2324		 * Are we ready? We are, if all disks are connected or
2325		 * if we have any disks and 'force' is true.
2326		 */
2327		ndisks = g_mirror_ndisks(sc, -1);
2328		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2329			;
2330		} else if (ndisks == 0) {
2331			/*
2332			 * Disks went down in starting phase, so destroy
2333			 * device.
2334			 */
2335			callout_drain(&sc->sc_callout);
2336			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2337			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2338			    sc->sc_rootmount);
2339			root_mount_rel(sc->sc_rootmount);
2340			sc->sc_rootmount = NULL;
2341			return;
2342		} else {
2343			return;
2344		}
2345
2346		/*
2347		 * Activate all disks with the biggest syncid.
2348		 */
2349		if (force) {
2350			/*
2351			 * If 'force' is true, we have been called due to
2352			 * timeout, so don't bother canceling timeout.
2353			 */
2354			ndisks = 0;
2355			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2356				if ((disk->d_flags &
2357				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2358					ndisks++;
2359				}
2360			}
2361			if (ndisks == 0) {
2362				/* No valid disks found, destroy device. */
2363				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2364				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2365				    __LINE__, sc->sc_rootmount);
2366				root_mount_rel(sc->sc_rootmount);
2367				sc->sc_rootmount = NULL;
2368				return;
2369			}
2370		} else {
2371			/* Cancel timeout. */
2372			callout_drain(&sc->sc_callout);
2373		}
2374
2375		/*
2376		 * Find the biggest genid.
2377		 */
2378		genid = 0;
2379		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2380			if (disk->d_genid > genid)
2381				genid = disk->d_genid;
2382		}
2383		sc->sc_genid = genid;
2384		/*
2385		 * Remove all disks without the biggest genid.
2386		 */
2387		broken = false;
2388		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2389			if (disk->d_genid < genid) {
2390				G_MIRROR_DEBUG(0,
2391				    "Component %s (device %s) broken, skipping.",
2392				    g_mirror_get_diskname(disk), sc->sc_name);
2393				g_mirror_destroy_disk(disk);
2394				/*
2395				 * Bump the syncid in case we discover a healthy
2396				 * replacement disk after starting the mirror.
2397				 */
2398				broken = true;
2399			}
2400		}
2401
2402		/*
2403		 * Find the biggest syncid.
2404		 */
2405		syncid = 0;
2406		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2407			if (disk->d_sync.ds_syncid > syncid)
2408				syncid = disk->d_sync.ds_syncid;
2409		}
2410
2411		/*
2412		 * Here we need to look for dirty disks and if all disks
2413		 * with the biggest syncid are dirty, we have to choose
2414		 * one with the biggest priority and rebuild the rest.
2415		 */
2416		/*
2417		 * Find the number of dirty disks with the biggest syncid.
2418		 * Find the number of disks with the biggest syncid.
2419		 * While here, find a disk with the biggest priority.
2420		 */
2421		dirty = ndisks = 0;
2422		pdisk = NULL;
2423		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2424			if (disk->d_sync.ds_syncid != syncid)
2425				continue;
2426			if ((disk->d_flags &
2427			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2428				continue;
2429			}
2430			ndisks++;
2431			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2432				dirty++;
2433				if (pdisk == NULL ||
2434				    pdisk->d_priority < disk->d_priority) {
2435					pdisk = disk;
2436				}
2437			}
2438		}
2439		if (dirty == 0) {
2440			/* No dirty disks at all, great. */
2441		} else if (dirty == ndisks) {
2442			/*
2443			 * Force synchronization for all dirty disks except one
2444			 * with the biggest priority.
2445			 */
2446			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2447			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2448			    "master disk for synchronization.",
2449			    g_mirror_get_diskname(pdisk), sc->sc_name);
2450			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2451				if (disk->d_sync.ds_syncid != syncid)
2452					continue;
2453				if ((disk->d_flags &
2454				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2455					continue;
2456				}
2457				KASSERT((disk->d_flags &
2458				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2459				    ("Disk %s isn't marked as dirty.",
2460				    g_mirror_get_diskname(disk)));
2461				/* Skip the disk with the biggest priority. */
2462				if (disk == pdisk)
2463					continue;
2464				disk->d_sync.ds_syncid = 0;
2465			}
2466		} else if (dirty < ndisks) {
2467			/*
2468			 * Force synchronization for all dirty disks.
2469			 * We have some non-dirty disks.
2470			 */
2471			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2472				if (disk->d_sync.ds_syncid != syncid)
2473					continue;
2474				if ((disk->d_flags &
2475				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2476					continue;
2477				}
2478				if ((disk->d_flags &
2479				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2480					continue;
2481				}
2482				disk->d_sync.ds_syncid = 0;
2483			}
2484		}
2485
2486		/* Reset hint. */
2487		sc->sc_hint = NULL;
2488		sc->sc_syncid = syncid;
2489		if (force || broken) {
2490			/* Remember to bump syncid on first write. */
2491			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2492		}
2493		state = G_MIRROR_DEVICE_STATE_RUNNING;
2494		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2495		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2496		    g_mirror_device_state2str(state));
2497		sc->sc_state = state;
2498		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2499			state = g_mirror_determine_state(disk);
2500			g_mirror_event_send(disk, state,
2501			    G_MIRROR_EVENT_DONTWAIT);
2502			if (state == G_MIRROR_DISK_STATE_STALE)
2503				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2504		}
2505		break;
2506	    }
2507	case G_MIRROR_DEVICE_STATE_RUNNING:
2508		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2509		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2510			/*
2511			 * No usable disks, so destroy the device.
2512			 */
2513			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2514			break;
2515		} else if (g_mirror_ndisks(sc,
2516		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2517		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2518			/*
2519			 * We have active disks, launch provider if it doesn't
2520			 * exist.
2521			 */
2522			if (sc->sc_provider == NULL)
2523				g_mirror_launch_provider(sc);
2524			if (sc->sc_rootmount != NULL) {
2525				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2526				    __LINE__, sc->sc_rootmount);
2527				root_mount_rel(sc->sc_rootmount);
2528				sc->sc_rootmount = NULL;
2529			}
2530		}
2531		/*
2532		 * Genid should be bumped immediately, so do it here.
2533		 */
2534		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2535			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2536			g_mirror_bump_genid(sc);
2537		}
2538		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2539			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2540			g_mirror_bump_syncid(sc);
2541		}
2542		break;
2543	default:
2544		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2545		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2546		break;
2547	}
2548}
2549
2550/*
2551 * Update disk state and device state if needed.
2552 */
2553#define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2554	"Disk %s state changed from %s to %s (device %s).",		\
2555	g_mirror_get_diskname(disk),					\
2556	g_mirror_disk_state2str(disk->d_state),				\
2557	g_mirror_disk_state2str(state), sc->sc_name)
2558static int
2559g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2560{
2561	struct g_mirror_softc *sc;
2562
2563	sc = disk->d_softc;
2564	sx_assert(&sc->sc_lock, SX_XLOCKED);
2565
2566again:
2567	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2568	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2569	    g_mirror_disk_state2str(state));
2570	switch (state) {
2571	case G_MIRROR_DISK_STATE_NEW:
2572		/*
2573		 * Possible scenarios:
2574		 * 1. New disk arrive.
2575		 */
2576		/* Previous state should be NONE. */
2577		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2578		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2579		    g_mirror_disk_state2str(disk->d_state)));
2580		DISK_STATE_CHANGED();
2581
2582		disk->d_state = state;
2583		if (LIST_EMPTY(&sc->sc_disks))
2584			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2585		else {
2586			struct g_mirror_disk *dp;
2587
2588			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2589				if (disk->d_priority >= dp->d_priority) {
2590					LIST_INSERT_BEFORE(dp, disk, d_next);
2591					dp = NULL;
2592					break;
2593				}
2594				if (LIST_NEXT(dp, d_next) == NULL)
2595					break;
2596			}
2597			if (dp != NULL)
2598				LIST_INSERT_AFTER(dp, disk, d_next);
2599		}
2600		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2601		    sc->sc_name, g_mirror_get_diskname(disk));
2602		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2603			break;
2604		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2605		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2606		    g_mirror_device_state2str(sc->sc_state),
2607		    g_mirror_get_diskname(disk),
2608		    g_mirror_disk_state2str(disk->d_state)));
2609		state = g_mirror_determine_state(disk);
2610		if (state != G_MIRROR_DISK_STATE_NONE)
2611			goto again;
2612		break;
2613	case G_MIRROR_DISK_STATE_ACTIVE:
2614		/*
2615		 * Possible scenarios:
2616		 * 1. New disk does not need synchronization.
2617		 * 2. Synchronization process finished successfully.
2618		 */
2619		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2620		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2621		    g_mirror_device_state2str(sc->sc_state),
2622		    g_mirror_get_diskname(disk),
2623		    g_mirror_disk_state2str(disk->d_state)));
2624		/* Previous state should be NEW or SYNCHRONIZING. */
2625		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2626		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2627		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2628		    g_mirror_disk_state2str(disk->d_state)));
2629		DISK_STATE_CHANGED();
2630
2631		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2632			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2633			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2634			g_mirror_sync_stop(disk, 0);
2635		}
2636		disk->d_state = state;
2637		disk->d_sync.ds_offset = 0;
2638		disk->d_sync.ds_offset_done = 0;
2639		g_mirror_update_idle(sc, disk);
2640		g_mirror_update_metadata(disk);
2641		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2642		    sc->sc_name, g_mirror_get_diskname(disk));
2643		break;
2644	case G_MIRROR_DISK_STATE_STALE:
2645		/*
2646		 * Possible scenarios:
2647		 * 1. Stale disk was connected.
2648		 */
2649		/* Previous state should be NEW. */
2650		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2651		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2652		    g_mirror_disk_state2str(disk->d_state)));
2653		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2654		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2655		    g_mirror_device_state2str(sc->sc_state),
2656		    g_mirror_get_diskname(disk),
2657		    g_mirror_disk_state2str(disk->d_state)));
2658		/*
2659		 * STALE state is only possible if device is marked
2660		 * NOAUTOSYNC.
2661		 */
2662		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2663		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2664		    g_mirror_device_state2str(sc->sc_state),
2665		    g_mirror_get_diskname(disk),
2666		    g_mirror_disk_state2str(disk->d_state)));
2667		DISK_STATE_CHANGED();
2668
2669		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2670		disk->d_state = state;
2671		g_mirror_update_metadata(disk);
2672		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2673		    sc->sc_name, g_mirror_get_diskname(disk));
2674		break;
2675	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2676		/*
2677		 * Possible scenarios:
2678		 * 1. Disk which needs synchronization was connected.
2679		 */
2680		/* Previous state should be NEW. */
2681		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2682		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2683		    g_mirror_disk_state2str(disk->d_state)));
2684		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2685		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2686		    g_mirror_device_state2str(sc->sc_state),
2687		    g_mirror_get_diskname(disk),
2688		    g_mirror_disk_state2str(disk->d_state)));
2689		DISK_STATE_CHANGED();
2690
2691		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2692			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2693		disk->d_state = state;
2694		if (sc->sc_provider != NULL) {
2695			g_mirror_sync_start(disk);
2696			g_mirror_update_metadata(disk);
2697		}
2698		break;
2699	case G_MIRROR_DISK_STATE_DISCONNECTED:
2700		/*
2701		 * Possible scenarios:
2702		 * 1. Device wasn't running yet, but disk disappear.
2703		 * 2. Disk was active and disapppear.
2704		 * 3. Disk disappear during synchronization process.
2705		 */
2706		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2707			/*
2708			 * Previous state should be ACTIVE, STALE or
2709			 * SYNCHRONIZING.
2710			 */
2711			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2712			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2713			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2714			    ("Wrong disk state (%s, %s).",
2715			    g_mirror_get_diskname(disk),
2716			    g_mirror_disk_state2str(disk->d_state)));
2717		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2718			/* Previous state should be NEW. */
2719			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2720			    ("Wrong disk state (%s, %s).",
2721			    g_mirror_get_diskname(disk),
2722			    g_mirror_disk_state2str(disk->d_state)));
2723			/*
2724			 * Reset bumping syncid if disk disappeared in STARTING
2725			 * state.
2726			 */
2727			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2728				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2729#ifdef	INVARIANTS
2730		} else {
2731			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2732			    sc->sc_name,
2733			    g_mirror_device_state2str(sc->sc_state),
2734			    g_mirror_get_diskname(disk),
2735			    g_mirror_disk_state2str(disk->d_state)));
2736#endif
2737		}
2738		DISK_STATE_CHANGED();
2739		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2740		    sc->sc_name, g_mirror_get_diskname(disk));
2741
2742		g_mirror_destroy_disk(disk);
2743		break;
2744	case G_MIRROR_DISK_STATE_DESTROY:
2745	    {
2746		int error;
2747
2748		error = g_mirror_clear_metadata(disk);
2749		if (error != 0) {
2750			G_MIRROR_DEBUG(0,
2751			    "Device %s: failed to clear metadata on %s: %d.",
2752			    sc->sc_name, g_mirror_get_diskname(disk), error);
2753			break;
2754		}
2755		DISK_STATE_CHANGED();
2756		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2757		    sc->sc_name, g_mirror_get_diskname(disk));
2758
2759		g_mirror_destroy_disk(disk);
2760		sc->sc_ndisks--;
2761		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2762			g_mirror_update_metadata(disk);
2763		}
2764		break;
2765	    }
2766	default:
2767		KASSERT(1 == 0, ("Unknown state (%u).", state));
2768		break;
2769	}
2770	return (0);
2771}
2772#undef	DISK_STATE_CHANGED
2773
2774int
2775g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2776{
2777	struct g_provider *pp;
2778	u_char *buf;
2779	int error;
2780
2781	g_topology_assert();
2782
2783	error = g_access(cp, 1, 0, 0);
2784	if (error != 0)
2785		return (error);
2786	pp = cp->provider;
2787	g_topology_unlock();
2788	/* Metadata are stored on last sector. */
2789	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2790	    &error);
2791	g_topology_lock();
2792	g_access(cp, -1, 0, 0);
2793	if (buf == NULL) {
2794		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2795		    cp->provider->name, error);
2796		return (error);
2797	}
2798
2799	/* Decode metadata. */
2800	error = mirror_metadata_decode(buf, md);
2801	g_free(buf);
2802	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2803		return (EINVAL);
2804	if (md->md_version > G_MIRROR_VERSION) {
2805		G_MIRROR_DEBUG(0,
2806		    "Kernel module is too old to handle metadata from %s.",
2807		    cp->provider->name);
2808		return (EINVAL);
2809	}
2810	if (error != 0) {
2811		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2812		    cp->provider->name);
2813		return (error);
2814	}
2815
2816	return (0);
2817}
2818
2819static int
2820g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2821    struct g_mirror_metadata *md)
2822{
2823
2824	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2825		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2826		    pp->name, md->md_did);
2827		return (EEXIST);
2828	}
2829	if (md->md_all != sc->sc_ndisks) {
2830		G_MIRROR_DEBUG(1,
2831		    "Invalid '%s' field on disk %s (device %s), skipping.",
2832		    "md_all", pp->name, sc->sc_name);
2833		return (EINVAL);
2834	}
2835	if (md->md_slice != sc->sc_slice) {
2836		G_MIRROR_DEBUG(1,
2837		    "Invalid '%s' field on disk %s (device %s), skipping.",
2838		    "md_slice", pp->name, sc->sc_name);
2839		return (EINVAL);
2840	}
2841	if (md->md_balance != sc->sc_balance) {
2842		G_MIRROR_DEBUG(1,
2843		    "Invalid '%s' field on disk %s (device %s), skipping.",
2844		    "md_balance", pp->name, sc->sc_name);
2845		return (EINVAL);
2846	}
2847#if 0
2848	if (md->md_mediasize != sc->sc_mediasize) {
2849		G_MIRROR_DEBUG(1,
2850		    "Invalid '%s' field on disk %s (device %s), skipping.",
2851		    "md_mediasize", pp->name, sc->sc_name);
2852		return (EINVAL);
2853	}
2854#endif
2855	if (sc->sc_mediasize > pp->mediasize) {
2856		G_MIRROR_DEBUG(1,
2857		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2858		    sc->sc_name);
2859		return (EINVAL);
2860	}
2861	if (md->md_sectorsize != sc->sc_sectorsize) {
2862		G_MIRROR_DEBUG(1,
2863		    "Invalid '%s' field on disk %s (device %s), skipping.",
2864		    "md_sectorsize", pp->name, sc->sc_name);
2865		return (EINVAL);
2866	}
2867	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2868		G_MIRROR_DEBUG(1,
2869		    "Invalid sector size of disk %s (device %s), skipping.",
2870		    pp->name, sc->sc_name);
2871		return (EINVAL);
2872	}
2873	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2874		G_MIRROR_DEBUG(1,
2875		    "Invalid device flags on disk %s (device %s), skipping.",
2876		    pp->name, sc->sc_name);
2877		return (EINVAL);
2878	}
2879	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2880		G_MIRROR_DEBUG(1,
2881		    "Invalid disk flags on disk %s (device %s), skipping.",
2882		    pp->name, sc->sc_name);
2883		return (EINVAL);
2884	}
2885	return (0);
2886}
2887
2888int
2889g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2890    struct g_mirror_metadata *md)
2891{
2892	struct g_mirror_disk *disk;
2893	int error;
2894
2895	g_topology_assert_not();
2896	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2897
2898	error = g_mirror_check_metadata(sc, pp, md);
2899	if (error != 0)
2900		return (error);
2901	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2902	    md->md_genid < sc->sc_genid) {
2903		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2904		    pp->name, sc->sc_name);
2905		return (EINVAL);
2906	}
2907	disk = g_mirror_init_disk(sc, pp, md, &error);
2908	if (disk == NULL)
2909		return (error);
2910	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2911	    G_MIRROR_EVENT_WAIT);
2912	if (error != 0)
2913		return (error);
2914	if (md->md_version < G_MIRROR_VERSION) {
2915		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2916		    pp->name, md->md_version, G_MIRROR_VERSION);
2917		g_mirror_update_metadata(disk);
2918	}
2919	return (0);
2920}
2921
2922static void
2923g_mirror_destroy_delayed(void *arg, int flag)
2924{
2925	struct g_mirror_softc *sc;
2926	int error;
2927
2928	if (flag == EV_CANCEL) {
2929		G_MIRROR_DEBUG(1, "Destroying canceled.");
2930		return;
2931	}
2932	sc = arg;
2933	g_topology_unlock();
2934	sx_xlock(&sc->sc_lock);
2935	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2936	    ("DESTROY flag set on %s.", sc->sc_name));
2937	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
2938	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
2939	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2940	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2941	if (error != 0) {
2942		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2943		    sc->sc_name, error);
2944		sx_xunlock(&sc->sc_lock);
2945	}
2946	g_topology_lock();
2947}
2948
2949static int
2950g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2951{
2952	struct g_mirror_softc *sc;
2953	int error = 0;
2954
2955	g_topology_assert();
2956	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2957	    acw, ace);
2958
2959	sc = pp->private;
2960	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2961
2962	g_topology_unlock();
2963	sx_xlock(&sc->sc_lock);
2964	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2965	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
2966	    LIST_EMPTY(&sc->sc_disks)) {
2967		if (acr > 0 || acw > 0 || ace > 0)
2968			error = ENXIO;
2969		goto end;
2970	}
2971	sc->sc_provider_open += acr + acw + ace;
2972	if (pp->acw + acw == 0)
2973		g_mirror_idle(sc, 0);
2974	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
2975	    sc->sc_provider_open == 0)
2976		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
2977end:
2978	sx_xunlock(&sc->sc_lock);
2979	g_topology_lock();
2980	return (error);
2981}
2982
2983struct g_geom *
2984g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
2985    u_int type)
2986{
2987	struct g_mirror_softc *sc;
2988	struct g_geom *gp;
2989	int error, timeout;
2990
2991	g_topology_assert();
2992	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2993	    md->md_mid);
2994
2995	/* One disk is minimum. */
2996	if (md->md_all < 1)
2997		return (NULL);
2998	/*
2999	 * Action geom.
3000	 */
3001	gp = g_new_geomf(mp, "%s", md->md_name);
3002	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3003	gp->start = g_mirror_start;
3004	gp->orphan = g_mirror_orphan;
3005	gp->access = g_mirror_access;
3006	gp->dumpconf = g_mirror_dumpconf;
3007
3008	sc->sc_type = type;
3009	sc->sc_id = md->md_mid;
3010	sc->sc_slice = md->md_slice;
3011	sc->sc_balance = md->md_balance;
3012	sc->sc_mediasize = md->md_mediasize;
3013	sc->sc_sectorsize = md->md_sectorsize;
3014	sc->sc_ndisks = md->md_all;
3015	sc->sc_flags = md->md_mflags;
3016	sc->sc_bump_id = 0;
3017	sc->sc_idle = 1;
3018	sc->sc_last_write = time_uptime;
3019	sc->sc_writes = 0;
3020	sc->sc_refcnt = 1;
3021	sx_init(&sc->sc_lock, "gmirror:lock");
3022	TAILQ_INIT(&sc->sc_queue);
3023	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3024	TAILQ_INIT(&sc->sc_regular_delayed);
3025	TAILQ_INIT(&sc->sc_inflight);
3026	TAILQ_INIT(&sc->sc_sync_delayed);
3027	LIST_INIT(&sc->sc_disks);
3028	TAILQ_INIT(&sc->sc_events);
3029	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3030	callout_init(&sc->sc_callout, 1);
3031	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3032	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3033	gp->softc = sc;
3034	sc->sc_geom = gp;
3035	sc->sc_provider = NULL;
3036	sc->sc_provider_open = 0;
3037	/*
3038	 * Synchronization geom.
3039	 */
3040	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3041	gp->softc = sc;
3042	gp->orphan = g_mirror_orphan;
3043	sc->sc_sync.ds_geom = gp;
3044	sc->sc_sync.ds_ndisks = 0;
3045	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3046	    "g_mirror %s", md->md_name);
3047	if (error != 0) {
3048		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3049		    sc->sc_name);
3050		g_destroy_geom(sc->sc_sync.ds_geom);
3051		g_destroy_geom(sc->sc_geom);
3052		g_mirror_free_device(sc);
3053		return (NULL);
3054	}
3055
3056	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3057	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3058
3059	sc->sc_rootmount = root_mount_hold("GMIRROR");
3060	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3061	/*
3062	 * Run timeout.
3063	 */
3064	timeout = g_mirror_timeout * hz;
3065	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3066	return (sc->sc_geom);
3067}
3068
3069int
3070g_mirror_destroy(struct g_mirror_softc *sc, int how)
3071{
3072	struct g_mirror_disk *disk;
3073
3074	g_topology_assert_not();
3075	sx_assert(&sc->sc_lock, SX_XLOCKED);
3076
3077	if (sc->sc_provider_open != 0) {
3078		switch (how) {
3079		case G_MIRROR_DESTROY_SOFT:
3080			G_MIRROR_DEBUG(1,
3081			    "Device %s is still open (%d).", sc->sc_name,
3082			    sc->sc_provider_open);
3083			return (EBUSY);
3084		case G_MIRROR_DESTROY_DELAYED:
3085			G_MIRROR_DEBUG(1,
3086			    "Device %s will be destroyed on last close.",
3087			    sc->sc_name);
3088			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3089				if (disk->d_state ==
3090				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3091					g_mirror_sync_stop(disk, 1);
3092				}
3093			}
3094			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3095			return (EBUSY);
3096		case G_MIRROR_DESTROY_HARD:
3097			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3098			    "can't be definitely removed.", sc->sc_name);
3099		}
3100	}
3101
3102	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3103		sx_xunlock(&sc->sc_lock);
3104		return (0);
3105	}
3106	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3107	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3108	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3109	sx_xunlock(&sc->sc_lock);
3110	mtx_lock(&sc->sc_queue_mtx);
3111	wakeup(sc);
3112	mtx_unlock(&sc->sc_queue_mtx);
3113	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3114	while (sc->sc_worker != NULL)
3115		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3116	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3117	sx_xlock(&sc->sc_lock);
3118	g_mirror_destroy_device(sc);
3119	return (0);
3120}
3121
3122static void
3123g_mirror_taste_orphan(struct g_consumer *cp)
3124{
3125
3126	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3127	    cp->provider->name));
3128}
3129
3130static struct g_geom *
3131g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3132{
3133	struct g_mirror_metadata md;
3134	struct g_mirror_softc *sc;
3135	struct g_consumer *cp;
3136	struct g_geom *gp;
3137	int error;
3138
3139	g_topology_assert();
3140	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3141	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3142
3143	gp = g_new_geomf(mp, "mirror:taste");
3144	/*
3145	 * This orphan function should be never called.
3146	 */
3147	gp->orphan = g_mirror_taste_orphan;
3148	cp = g_new_consumer(gp);
3149	g_attach(cp, pp);
3150	error = g_mirror_read_metadata(cp, &md);
3151	g_detach(cp);
3152	g_destroy_consumer(cp);
3153	g_destroy_geom(gp);
3154	if (error != 0)
3155		return (NULL);
3156	gp = NULL;
3157
3158	if (md.md_provider[0] != '\0' &&
3159	    !g_compare_names(md.md_provider, pp->name))
3160		return (NULL);
3161	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3162		return (NULL);
3163	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3164		G_MIRROR_DEBUG(0,
3165		    "Device %s: provider %s marked as inactive, skipping.",
3166		    md.md_name, pp->name);
3167		return (NULL);
3168	}
3169	if (g_mirror_debug >= 2)
3170		mirror_metadata_dump(&md);
3171
3172	/*
3173	 * Let's check if device already exists.
3174	 */
3175	sc = NULL;
3176	LIST_FOREACH(gp, &mp->geom, geom) {
3177		sc = gp->softc;
3178		if (sc == NULL)
3179			continue;
3180		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3181			continue;
3182		if (sc->sc_sync.ds_geom == gp)
3183			continue;
3184		if (strcmp(md.md_name, sc->sc_name) != 0)
3185			continue;
3186		if (md.md_mid != sc->sc_id) {
3187			G_MIRROR_DEBUG(0, "Device %s already configured.",
3188			    sc->sc_name);
3189			return (NULL);
3190		}
3191		break;
3192	}
3193	if (gp == NULL) {
3194		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3195		if (gp == NULL) {
3196			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3197			    md.md_name);
3198			return (NULL);
3199		}
3200		sc = gp->softc;
3201	}
3202	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3203	g_topology_unlock();
3204	sx_xlock(&sc->sc_lock);
3205	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3206	error = g_mirror_add_disk(sc, pp, &md);
3207	if (error != 0) {
3208		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3209		    pp->name, gp->name, error);
3210		if (LIST_EMPTY(&sc->sc_disks)) {
3211			g_cancel_event(sc);
3212			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3213			g_topology_lock();
3214			return (NULL);
3215		}
3216		gp = NULL;
3217	}
3218	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3219	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3220		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3221		g_topology_lock();
3222		return (NULL);
3223	}
3224	sx_xunlock(&sc->sc_lock);
3225	g_topology_lock();
3226	return (gp);
3227}
3228
3229static void
3230g_mirror_resize(struct g_consumer *cp)
3231{
3232	struct g_mirror_disk *disk;
3233
3234	g_topology_assert();
3235	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3236
3237	disk = cp->private;
3238	if (disk == NULL)
3239		return;
3240	g_topology_unlock();
3241	g_mirror_update_metadata(disk);
3242	g_topology_lock();
3243}
3244
3245static int
3246g_mirror_destroy_geom(struct gctl_req *req __unused,
3247    struct g_class *mp __unused, struct g_geom *gp)
3248{
3249	struct g_mirror_softc *sc;
3250	int error;
3251
3252	g_topology_unlock();
3253	sc = gp->softc;
3254	sx_xlock(&sc->sc_lock);
3255	g_cancel_event(sc);
3256	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3257	if (error != 0)
3258		sx_xunlock(&sc->sc_lock);
3259	g_topology_lock();
3260	return (error);
3261}
3262
3263static void
3264g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3265    struct g_consumer *cp, struct g_provider *pp)
3266{
3267	struct g_mirror_softc *sc;
3268
3269	g_topology_assert();
3270
3271	sc = gp->softc;
3272	if (sc == NULL)
3273		return;
3274	/* Skip synchronization geom. */
3275	if (gp == sc->sc_sync.ds_geom)
3276		return;
3277	if (pp != NULL) {
3278		/* Nothing here. */
3279	} else if (cp != NULL) {
3280		struct g_mirror_disk *disk;
3281
3282		disk = cp->private;
3283		if (disk == NULL)
3284			return;
3285		g_topology_unlock();
3286		sx_xlock(&sc->sc_lock);
3287		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3288		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3289			sbuf_printf(sb, "%s<Synchronized>", indent);
3290			if (disk->d_sync.ds_offset == 0)
3291				sbuf_printf(sb, "0%%");
3292			else {
3293				sbuf_printf(sb, "%u%%",
3294				    (u_int)((disk->d_sync.ds_offset * 100) /
3295				    sc->sc_provider->mediasize));
3296			}
3297			sbuf_printf(sb, "</Synchronized>\n");
3298			if (disk->d_sync.ds_offset > 0) {
3299				sbuf_printf(sb, "%s<BytesSynced>%jd"
3300				    "</BytesSynced>\n", indent,
3301				    (intmax_t)disk->d_sync.ds_offset);
3302			}
3303		}
3304		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3305		    disk->d_sync.ds_syncid);
3306		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3307		    disk->d_genid);
3308		sbuf_printf(sb, "%s<Flags>", indent);
3309		if (disk->d_flags == 0)
3310			sbuf_printf(sb, "NONE");
3311		else {
3312			int first = 1;
3313
3314#define	ADD_FLAG(flag, name)	do {					\
3315	if ((disk->d_flags & (flag)) != 0) {				\
3316		if (!first)						\
3317			sbuf_printf(sb, ", ");				\
3318		else							\
3319			first = 0;					\
3320		sbuf_printf(sb, name);					\
3321	}								\
3322} while (0)
3323			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3324			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3325			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3326			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3327			    "SYNCHRONIZING");
3328			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3329			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3330#undef	ADD_FLAG
3331		}
3332		sbuf_printf(sb, "</Flags>\n");
3333		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3334		    disk->d_priority);
3335		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3336		    g_mirror_disk_state2str(disk->d_state));
3337		sx_xunlock(&sc->sc_lock);
3338		g_topology_lock();
3339	} else {
3340		g_topology_unlock();
3341		sx_xlock(&sc->sc_lock);
3342		sbuf_printf(sb, "%s<Type>", indent);
3343		switch (sc->sc_type) {
3344		case G_MIRROR_TYPE_AUTOMATIC:
3345			sbuf_printf(sb, "AUTOMATIC");
3346			break;
3347		case G_MIRROR_TYPE_MANUAL:
3348			sbuf_printf(sb, "MANUAL");
3349			break;
3350		default:
3351			sbuf_printf(sb, "UNKNOWN");
3352			break;
3353		}
3354		sbuf_printf(sb, "</Type>\n");
3355		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3356		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3357		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3358		sbuf_printf(sb, "%s<Flags>", indent);
3359		if (sc->sc_flags == 0)
3360			sbuf_printf(sb, "NONE");
3361		else {
3362			int first = 1;
3363
3364#define	ADD_FLAG(flag, name)	do {					\
3365	if ((sc->sc_flags & (flag)) != 0) {				\
3366		if (!first)						\
3367			sbuf_printf(sb, ", ");				\
3368		else							\
3369			first = 0;					\
3370		sbuf_printf(sb, name);					\
3371	}								\
3372} while (0)
3373			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3374			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3375#undef	ADD_FLAG
3376		}
3377		sbuf_printf(sb, "</Flags>\n");
3378		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3379		    (u_int)sc->sc_slice);
3380		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3381		    balance_name(sc->sc_balance));
3382		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3383		    sc->sc_ndisks);
3384		sbuf_printf(sb, "%s<State>", indent);
3385		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3386			sbuf_printf(sb, "%s", "STARTING");
3387		else if (sc->sc_ndisks ==
3388		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3389			sbuf_printf(sb, "%s", "COMPLETE");
3390		else
3391			sbuf_printf(sb, "%s", "DEGRADED");
3392		sbuf_printf(sb, "</State>\n");
3393		sx_xunlock(&sc->sc_lock);
3394		g_topology_lock();
3395	}
3396}
3397
3398static void
3399g_mirror_shutdown_post_sync(void *arg, int howto)
3400{
3401	struct g_class *mp;
3402	struct g_geom *gp, *gp2;
3403	struct g_mirror_softc *sc;
3404	int error;
3405
3406	if (panicstr != NULL)
3407		return;
3408
3409	mp = arg;
3410	g_topology_lock();
3411	g_mirror_shutdown = 1;
3412	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3413		if ((sc = gp->softc) == NULL)
3414			continue;
3415		/* Skip synchronization geom. */
3416		if (gp == sc->sc_sync.ds_geom)
3417			continue;
3418		g_topology_unlock();
3419		sx_xlock(&sc->sc_lock);
3420		g_mirror_idle(sc, -1);
3421		g_cancel_event(sc);
3422		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3423		if (error != 0)
3424			sx_xunlock(&sc->sc_lock);
3425		g_topology_lock();
3426	}
3427	g_topology_unlock();
3428}
3429
3430static void
3431g_mirror_init(struct g_class *mp)
3432{
3433
3434	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3435	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3436	if (g_mirror_post_sync == NULL)
3437		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3438}
3439
3440static void
3441g_mirror_fini(struct g_class *mp)
3442{
3443
3444	if (g_mirror_post_sync != NULL)
3445		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3446}
3447
3448DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3449